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FIG. 3 



□ specifier=addre$s+(size/2)+(width /2 ) 



depth = .4 bytes [ 


width = 16 bytes] — J\ l 5/>e ~ depth x width = 64 bytes | 


address is aligned to size (64 bytes) 
so low-order 6 bits are zero 


ooooaaaaaaaaaaaaoaao aa aaaa 0aaaaaaaaa a \ ooqooq] 
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FIG. 5 
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FIG. 7 



□ wmc.c contents 


□ wmc.pa-physical address 
Uwmc. size- size of contents 
U wmc.c v-con ten ts valid 
Uwmc.th- thread last used 

□ wmc.r eg- register last used 
Uwmc.rtv-register & thread valid 


FIG. 9 
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Fig. 13 
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def data.flags <- AccessPhysicalLTB(pa,op,wdata) as 
th<-pa 2 3..i9 
en <- pa6..4 

if (en < (1 || 0 LE )) and (th < T) and (pai8..6=0) then 
case op of 
R: 

data <- 0 64 || LTBArray[th][en] 

W. 

LocalTB[th][en] <- wdata63..o 

endcase 

else 

data <- 0 

endif 
enddef 

Fig. 16 
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Fig. 21 


def ga.LocalProtect <- LocalTranslation(th,ba,la,pl) as 
if LB & (ba 6 3..48 © I363..48) then 

raise AccessDisallowedByVirtualAddress 

endif 

me <- NONE 
fori<-0to(1 ||0 LE )-1 

(I363..48 & -LocalTB[th][i] 63 .. 48 ) = LocalTB[th][i] 47 ..32 then 
me <r- i 

endif 
endfor 

if me = NONE then 

if ~ControlRegister p |+8 then 
raise LocalTBMiss 

endif 
ga <- la 

LocalProtect <- 0 

else 

ga <- (va 63 .. 48 A LocalTB[th][me] 31 ..i 6 ) || va 47 ..o 
LocalProtect <- LocalTB[th][me]i5..o 

endif 
enddef 

Fig. 22 
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Fig. 23 


def data,flags <- AccessPhysicalGTB(pa,op,wdata) as 
th<-pa 23 ..i9+GT|| 0 GT 
en <- pais. 4 

if (en < (1 || 0 G )) and (th < T) and (pa I8+GT .., 9 = 0) then 
case op of 
R: 

data <- GTBArray[th 5 ..GT][en] 

W: 

GTBArray[th 5 .. G T][en] <- wdata 

endcase 

else 

data <- 0 

endif 

enddef 

Fig. 24 
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def pa.GlobalProtect <- GlobalAddressTranslation(th,ga,pl,lda) as 
me <- NONE 
for i <-0 to (1 || 0 G )-1 

if GlobalTB[th 5 .. GT ][i] * 0 then 

size <- (GlobalTB[th 5 „ G T][i]63..7 and (0 64 -GlobalTB(th 5 .. G T][i]63.j)) II 0 8 
'f ((ga63..8l|0 8 ) A (GlobalTB[th 5 .. GT ][i]63..8l|0 8 )) and (0 64 -size)) = 0 then 
me <- GlobalTB[ths..GT][i] 

endif 

endif 
endfor 

if me = NONE then 
if Ida then 

Perform AccessDetail(AccessDetail Req uired ByLocalTB) 

endif 

raise GlobalTBMiss 

else 

Pa <r- (ga 6 3..8 A GlobalTB[th5.. GT ][me] 12 7..72) II 937..0 

GlobalProtect <- GlobalTB[th5.. GT ][me] 7 i..64 II 0 1 || GlobalTB[th 5 .. G T][me]6..0 

endif 
enddef 


Fig. 26 


def GTBUpdateWrite(th t fill,data) as 
me <- NONE 
for i <- 0 to (1 ||0 G )-1 

size <- (Globa[TB[th 5 ..GT][']63..7 and (0 64 -GlobalTB(th 5 .. G T][i]63..7)) II 0 8 
if ((data 63 .. 8 ||0 8 ) A (GlobalTB[th 5 .. G T][i]63..8l|0 8 )) and (0 64 -size) = 0 then 
me <~ i 

end if 
endfor 

if me = NONE then 
if fill then 

GlobalTB[th 5 .. GT ][GTBLast[th 5 ..GT]] <- data 
GTBLast[th 5 .. GT ] <~ (GTBLast[th 5 .. GT ] + 1)G-1..0 
if GTBLast[th 5 .. GT ] = 0 then 

GTBLast[th 5 .. GT ] <r- GTBFirst[th 5 .. GT ] 

GTBBump[th5.. G j] 1 

end if 

endif 

else 

GlobalTB[th5„ G x][me] <- data 

endif 
enddef 

Fig. 27 
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Fig. 29 


def data.flags <- AccessPhysicalGTBRegisters(pa,op,wdata) as 
th <- pa 2 3..l9+GT II 0 GT 
rn«-pai8..8 

if (rn < 5) and (th < T) and (pai8+GT..l9 = 0) and (P&7.A = 0) then 
case rn || op of 
0 || R, 1 || R: 

data <- 0 
0 || W, 1 || W: 

GTBUpdateWrite(th,rno,wdata) 

2 || R: 

data <- 0 64 " G || GTBLast[th 5 ..GT] 

2||W: 

GTBLast[th5 qt] *~ wdatac-1 o 

3 II R: 

data <- 0 64 " G || GTBFirst[th 5 ..GTl 
3||W: 

GTBFirst[th5 gt] *- wdataQ-1 o 
3 II R: 

data <- 0 63 || GTBBump[th 5 qt] 

3||W: 

GTBBump[th5,.GT] *- wdatao 

endcase 

else 

data <- 0 

end if 
enddef 


Fig. 30 


I G. BOOLEAN | Group Boolean 

Equivalencies 


G.AAA 

Group three-way and 

G.AAA. 1 

GrouD add add add bits 

G.AAS.1 

GrouD add add subtract hifc 

G ADD 1 

Grouo add bits 

G AND 

GrouD and 

G ANDN 

Grouo and not 

Wl W W W VII IU 1 IWl 

G COPY 

Grouo coov I 

G A/>4A4 

GrouD three-wav nand 

wi wuw^ 11 ii w w wcjy ■ icii iu 

G NAND 

Grouo nand 

G NOOO 

. f ¥ W/ W" V»y 

Groun three-wav nor 

>— ' i uu^ ii ii ww woijf i iui 

G.NOR 

Grouo nor 

G.NOT 

Grouo not 

G NXXX 

Grouo three-wav exclusivp-nor 

UUp 11 11 WW ¥VOIjr CAOIUOI VC 1 Iwl 

G OOO 

\*s » w/ w/ 

Grouo threp-wav or 

v— 'i w w ^ ii ii ivoy ui 

G.OR 

GrouD or 

G.ORN 

Grouo or not 

1 VU w WI 1 IV 1 

G SAA 1 

Groun suhtrart add add hitQ 

G S>4S 7 

V— ».W/^W. / 

Grnun snhtrart add Qiihtrart hitc 

G SET 

Grouo set 

^^1 UUU 

G SET AND E 1 

Grouo set and pnual 7prn hits 

Wl v/up wWl dl IVJ w^JUGII LCIU Ullw 

G SET AND NE 1 

Grouo spt and not pnual 7prn hitQ 

Wl vU|J OW 1 Cil Ivl 1 Iwl wvlvlGII LCIU Ly 1 lO 

G SET E f 

Grouo set pnual hits 

w *\ \J\jyj ww 1 wl^LJCll w 1 LO 

G SET G f 

Grouo spt orpatpr sinnpd hits 

G SET G U 1 

Grouo spt orpatpr un^innpd hit^ 

wl UU|J Owl ^IwCtlwl ut ioiyi ICU Ullw 

G SET G71 

Groun spt orpatpr 7Pro QinnpH hitQ 

viuup wwi yicciiwt z.wi vj oiy 1 IwxJ UllO 

G.SET.GE.7 

Group set greater equal signed bits 

G.SE7.GE.Z.? 

Group set greater equal zero signed bits 

G.SET.L.1 

Group set less signed bits 

G.SET.L.Z.1 

Group set less zero signed bits 

G.SET.LE.1 

Group set less equal signed bits j 

G.SET.LE.U.1 

Group set less equal unsigned bits 

G.SET.LE.Z.1 

Group set less equal zero signed bits ! 

G.SET.NE.1 

Group set not equal bits 

G.SET.GE.U.1 

Group set greater equal unsigned bits 

G.SET.L.U.1 

Group set less unsigned bits 


Fig. 31A 


G.SSA.1 

Group subtract subtract add bits 

G.SSS.1 

Group subtract subtract subtract bits 

G.SUB.1 

Group subtract bits 

G.XNOR 

Group exclusive-nor j 

G.XOR 

Group exclusive-or 

G.XXX 

Group three-way exclusive-or 

G.ZERO 

Group zero 


G.AAA rd@rc,rb 

<— 

G.BOOLEAN rd@rc,rb,0b 10000000 

G.AAA. 1 rd@rc,rb 


G.XXX rd@rc,rb 

G.AAS. 1 rd@rc,rb 

— > 

G.XXX rd@rc.rb 

G.ADD. 1 rd=rc,rb 

-> 

G.XOR rd=rc,rb 

G.AND rd=rc,rb 

<- 

G.BOOLEAN rd@rc,rb,0b1 0001 000 

G.ANDN rd=rc,rb 

<- 

G.BOOLEAN rd@rc,rb,0b01 0001 00 j 

G. BOOLEAN rd@rb,rc,i 

-> 

G.BOOLEAN rd@rc,rb,i7i5i6i4i3ii i2k) 

G.COPYrd=rc 

<- 

G.BOOLEAN rd@rc,rc,0b1 0001 000 

G.NAAA. rd@rc,rb 

<- 

G.BOOLEAN rd@rc,rb,0b01 111111 

G.NAND rd=rc,rb 

<— 

G.BOOLEAN rd@rc,rb,0b01 110111 

G.NOOO rd@rc,rb 

<- 

G.BOOLEAN rd@rc,rb,0b00000001 

G.NOR rd=rc,rb 

<- 

G.BOOLEAN rd@rc,rb,0b00010001 j 

G.NOTrd=rc 

<- 

G.BOOLEAN rd@rc,rc,0b00010001 

G.NXXX rd@rc,rb 

<- 

G BOOLEAN rd(5)rc rb 0b01 101001 

G.000 rd@rc,rb 

<— 

G.BOOLEAN rd@rc,rb,0b1 1111110 

G.ORrd=rc,rb 

<— 

G.BOOLEAN rd@rc,rb,0b1 1 101 1 10 

G.ORN rd=rc,rb 

<- 

G.BOOLEAN rd@rc,rb,0b1 101 1 101 | 

G.SAA.1 rd@rc,rb 

—> 

G.XXX rd@rc.rb j 

G.SAS.1 rd@rc,rb 

-> 

G.XXX rd@rc,rb 

G.SETrd 

<- 

G.BOOLEAN rd@rd,rd, 0b 10000001 

G. SET.AND. E. 1 rd=rb,rc -+ 

G.NAND rd=rc.rb 

G. SET.AND.NE. 1 rd=rb,rc 

G.AND rd=rc,rb | 

G.SET.E.1 rd=rb,rc 

—> 

G.XNOR rd=rc,rb 

G.SET.G.1rd=rb,rc 

-> 

G.ANDN rd=rc,rb 

G. SET. G. U. 1 rd=rb,rc 

-t 

G.ANDN rd=rb,rc 

G.SET.G.Z.1 rd=rc 

-> 

G.ZERO rd 

G.SET.GE.1 rd=rb,rc 

-» 

G.ORN rd=rc,rb j 

G.SET.GE.Z.1 rd=rc 

->■ 

G.NOT rd=rc 


Fig. 31 A (cont'd) 


ft OCT / 4 rr4—rhk rr> 

— > 

o.ainuin ra-ru,rc 

ft OCT / 7 4 rri—rn 

— » 

ft ftftOV rW — rr> 

o.UUrY rO— rC 

o.ot / .Lt.7 rci—rD,rc 

— » 

Lj.ukin ra-rD,rc 

ft OCT 1 E II i rrl—rH rn 

vj.oc i .lcz.u. i ru—ro,rc 

— > 

ft ADM rA — rr* rk 

o.uKiN ra-rc,rD \ 

ft QCT 1 C 7 4 rri—rn 

— » 

ft CCT rrl 

ft OCT MC -f rr4—rhk rs> 

b.ot/./vt. 7 ru—rD,rc 

— > 

b.AUK ra-rcrD 

ft OCT r*C 114 r-W— f-K fv» 

-> 

A ADM _-J _u > 1 

G.ORN rd=rb,rc 

ft OCT / / / 4 r-W— r*K rrs 

-> 

g.ainun ra=rc,rb 

0.00/1.7 ra(g}rc,rD 

-> 

G.XXX ra@rc,rb 

99.9 rrlfn)rr rh 

— > 

ft YYY rrltffirr rh 1 

G.SUB.1 rd=rc,rb 

-> 

G.XOR rd=rc,rb 

G.XNOR rd=rc,rb 

<- 

G.BOOLEAN rd@rc,rb,0b1 001 1001 | 

G.XOR rd=rc,rb 

<- 

G. BOOLEAN rd@rc,rb,0b01100110 

G.XXX rd@rc,rb 

^ 

G.BOOLEAN rd@rc,rb,0b1 001 01 10 

G.ZERO rd 

<- 

G.BOOLEAN rd@rd,rd,0b00000000 


Selection 


operation 

function (binary) 

function (decimal) 

d 

11110000 

240 

c 

11001100 

204 

b 

10101010 

176 

d&c&b 

10000000 

128 

(d&c)|b 

11101010 

234 | 

d|c|b 

11111110 

254 

d?c:b 

11001010 

202 | 

d A c A b 

10010110 

150 

~d*c A b 

01101001 

105 

0 

00000000 

0 


Fig. 31 A (cont'd) 


Format 

G.BOOLEANrd@trc,trb,f 
rd=gbooleani(rd,rc,rb,f) 

31 252423 18 17 12 11 65 0 

[""^BOOLEAN |ih| rd | rc | rb | jj | 

7 1 6 6 6 6 

if f6=f5 then 

if f2=fi then 

if f2 then 

rc <r- max(trc,trb) 
rb <- min(trc,trb) 

else 

rc <r- min(trc,trb) 
rb <- max(trc,trb) 

endif 
ih<-0 

il^0||f 6 ||f7l|f4l|f3l|f0 

else 

if f2 then 

rc <- trb 
rb <- trc 

else 

rc <- trc 
rb <- trb 

endif 
ih<-0 

"^1 I|f6l|f7l|f4||f3l|f0 

endif 

else 

ih<- 1 
if f6 then 

rc <r- trb 

rb <- trc 

il^fl I|f2l|f7l|f4l|f3l|f0 

else 

rc <- trc 
rb <- trb 

"<-f2l|fll|f7l|f4l|f3l|f0 

endif 

endif 

Fig. 31B 


D finition 

def GroupBoolean (ih,rd,rc,rb,il) 
d <r- RegRead(rd, 128) 
c <- RegRead(rc, 128) 
b <r- RegRead(rb, 128) 
if ih=0 then 

if ils=0 then 

f^ilslMUIMU II !l2 II "1 II (rc>rb)2 || il 0 

else 

f<-il 3 ||il4lMl4 II »2ll "I II 0 || 1 || il 0 

endif 

else 

f<-ll3l|0||1 IMI2IIH1 Il»5l|il4 Po 

endif 

fori<-0to 127 by size 

aj f(di||Cj||bi) 
endfor 

RegWrite(rd, 128, a) 
enddef 


Exceptions 

none 


Fig. 31C 


Operation codes 


IG.MUX 

Group multiplex 

i 

Redundancies 

G.MUX ra=rd,rc,rc 

<=> 

G.COPYra=rc 

G.MUX ra=ra,rc,rb 

<=> 

G. BOOLEAN ra@rc,rb,0x1 1001010 

G.MUX ra=rd,ra,rb 

<=> 

G. BOOLEAN ra@rd,rb,0x1 1 100010 

G.MUX ra=rd,rc,ra 

<=> 

G. BOOLEAN ra@rd,rc,0x1 101 1000 

G.MUX ra=rd,rd,rb 

<=> 

G.OR ra=rd,rb 

G.MUX ra=rd,rc,rd 

<=> 

G.AND ra=rd,rc 


Format 


G.MUX ra=rd,rc,rb 
ra=gmux(rd,rc,rb) 

31 24 23 18 17 12 11 6 5 0 

I G.MUX I rd | rc I rb | ra ~| 

8 6 6 6 6 


Fig. 31D 


Definition 

def GroupTernary(op,size,rd,rc,rb,ra) as 
d <- RegRead(rd, 128) 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
case op of 
G.MUX: 

a <r- (c and d) or (b and not d) 

endcase 

RegWrite(ra, 128, a) 
enddef 

Exceptions 

none 

Fig. 31D 


G.ADD.8 

Group add bytes 

G.ADD.16 

Group add doublets 

G.ADD.32 

Group add quadlets 

G.ADD.64 

Group add octlets 

G.ADD.128 

Group add hexlet 

G.ADD.L.8 

Group add limit signed bytes 

G.ADD.L16 

Group add limit signed doublets 

G.ADD.L.32 

Group add limit signed quadlets 

G.ADD.L.64 

Group add limit signed octlets 

G.ADD.L128 

Group add limit signed hexlet 

G.ADD.LU.8 

Group add limit unsigned bytes 

G.ADD.L.U.16 

Group add limit unsigned doublets 

G.ADD.LU.32 

Group add limit unsigned quadlets 

G.ADD.L.U.64 

Group add limit unsigned octlets 

G.ADD.L.U.128 

Group add limit unsigned hexlet 

G.ADD.8.0 

Group add signed bytes check overflow 

G.ADD.16.0 

Group add signed doublets check overflow 

G.ADD.32.0 

Group add signed quadlets check overflow 

G.ADD.64.0 

Group add signed octlets check overflow 

G.ADD.128.0 

Group add signed hexlet check overflow 

G.ADD.U.8.0 

Group add unsigned bytes check overflow 

G.ADD.U.16.0 

Group add unsigned doublets check overflow 

G.ADD.U.32.0 

Group add unsigned quadlets check overflow 

G.ADD.U.64.0 

Group add unsigned octlets check overflow 

G.ADD.U.128.0 

Group add unsigned hexlet check overflow 


Fig. 32A 


Format 


G.op.size rd=rc,rb 
rd=gopsize(rc,rb) 

31 24 23 18 17 12 11 65 0 

| G.size | rd | rc [ rb | op | 

8 6 6 6 6 


Fig. 32B 


Definition 

def Group(op,size,rd,rc,rb) 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
case op of 
G.ADD: 

for i <- 0 to 128-size by size 

a i+size-1..i <~- c i+size-1..i + bj+size-1. J 
endfor 
G.ADD. L: 

for i <- 0 to 128-size by size 

t <- (Cj+size-1 II c i+size-1..i) + (bj+size-1 II bj+size-1. .i) 

ai + size-1..i <- (tsize * t S i ze -l) ? (tsize II : tsize- 1..0 

endfor 
G.ADD. L.U: 

for i <r- 0 to 128-size by size 

t <- (0 1 || Cj+size-Lj) + (0 1 || bj+size-1. .i) 
aj+size-U <- (t size * 0) ? (isizej - t S j ze -i..o 
endfor 
G.ADD. O: 

for i <- 0 to 128-size by size 

t <- (ci+size-1 II Cj+size-1..i) + (bi+size-1 II bj+size-1. .i) 
if tsize * tsize- 1 then 

raise FixedPointArithmetic 

end if 

a i+size-1..i <- tsize-1. .0 
endfor 
G.ADD. U.O: 

for i <— 0 to 128-size by size 

t <- (0 1 || q+size-u) + (0 1 || bj+size-1. .i) 
if t S ize * 0 then 

raise FixedPointArithmetic 

end if 

ai+size-1..i<-t s j Z e-1..0 
endfor 

endcase 

RegWrite(rd f 128, a) 
enddef 


Exceptions 

Fixed-point arithmetic 


Fig. 32C 


Operation codes 


G.SET.AND.E.8 

Group set and equal zero bytes 

G.SET.AND.E.16 

Group set and equal zero doublets 

G.SET.AND.E.32 

Group set and equal zero quadlets 

G.SET.AND.E.64 

Group set and equal zero octlets 

G.SET.AND.E.128 

Group set and equal zero hexlet 

G.SET.AND.NE.8 

Group set and not equal zero bytes 

G.SET.AND.NE.16 

Group set and not equal zero doublets 

G.SET.AND.NE.32 

Group set and not equal zero quadlets 

G.SET.AND.NE.64 

Group set and not equal zero octlets 

G.SET.AND.NE.128 

Group set and not equal zero hexlet 

G.SET.E.8 

Group set equal bytes 

G.SET.E.16 

Group set equal doublets j 

G.SET.E.32 

Group set equal quadlets 

G.SET.E.64 

Group set equal octlets 

G.SET.E.128 

Group set equal hexlet 

G.SET.GE.8 

Group set greater equal signed bytes 

G.SET.GE.16 

Group set greater equal signed doublets 

G.SET.GE.32 

Group set greater equal signed quadlets 

G.SET.GE.64 

Group set greater equal signed octlets 

G.SET.GE.128 

Group set greater equal signed hexlet 

G.SET.GE.U.8 

Group set greater equal unsigned bytes 

G.SET.GE.U.16 

Group set greater equal unsigned doublets 

G.SET.GE.U.32 

Group set greater equal unsigned quadlets 

G.SET.GE.U.64 

Group set greater equal unsigned octlets 

G.SET.GE.U.128 

Group set greater equal unsigned hexlet 

G.SET.L.8 

Group set signed less bytes 

G.SET.L16 

Group set signed less doublets I 

G.SET.L.32 

Group set signed less quadlets l 

G.SET.L.64 

Group set signed less octlets 

G.SET.L.128 

Group set signed less hexlet 

O OCT 1 1 1 O 

o.ob 1 .L.U.o 

Group set less unsigned bytes I 

G.SET.LU.16 

Group set less unsigned doublets 

G.SET.LU.32 

Group set less unsigned quadlets 

G.SET.L.U.64 

Group set less unsigned octlets 

G.SET.LU.128 

Grouo set less unsigned hexlet 

G.SET.NE.8 

Group s^t not equal bytes 

G.SET.NE.16 

Group set not equal doublets 


Fig. 33A 


G.SET.NE.32 

Group set not equal quadlets 

G.SET.NE.64 

Group set not equal octlets 

G.SET.NE.128 

Group set not equal hexlet 

G.SUB.8 

Group subtract bytes 

G.SUB.8.0 

Group subtract signed bytes check overflow 

G.SUB.16 

Group subtract doublets 

G.SUB.16.0 

Group subtract signed doublets check overflow 

G.SUB.32 

Group subtract quadlets 

G.SUB.32.0 

Group subtract signed quadlets check overflow 

G.SUB.64 

Group subtract octlets 

G.SUB.64.0 

Group subtract signed octlets check overflow 

G.SUB.128 

Group subtract hexlet 

G.SUB.128.0 

Group subtract signed hexlet check overflow 

G.SUB.L.8 

Group subtract limit signed bytes 

G.SUB.L16 

Group subtract limit signed doublets 

G.SUB.L32 

Group subtract limit signed quadlets j 

G.SUB.L.64 

Group subtract limit signed octlets I 

G.SUB.L128 

Group subtract limit signed hexlet 

G.SUB.LU.8 

Group subtract limit unsigned bytes j 

G.SUB.LU.16 

Group subtract limit unsigned doublets ] 

G.SUB.L.U.32 

Group subtract limit unsigned quadlets j 

G.SUB.LU.64 

Group subtract limit unsigned octlets 

G.SUB.LU.128 

Group subtract limit unsigned hexlet 

G.SUB.U.8.0 

Group subtract unsigned bytes check overflow 

G.SUB.U.16.0 

Group subtract unsigned doublets check overflow 

G.SUB.U.32.0 

Group subtract unsigned quadlets check overflow 

G.SUB.U.64.0 

Group subtract unsigned octlets check overflow 

G.SUB.U.128.0 

Group subtract unsigned hexlet check overflow | 


Fig. 33A (cont'd) 


Equivalencies 


G.SET.E.Z.8 

Group set equal zero bytes 

G.SET.E.Z.16 

Group set equal zero doublets 

G.SET.E.Z.32 

Group set equal zero quadlets 

G.SET.E.Z.64 

Group set equal zero octlets 

G.SET.E.Z.128 

Group set equal zero hexlet 

G.SET.G.Z.8 

Group set greater zero signed bytes 

G.SET.G.Z.16 

Group set greater zero signed doublets I 

G.SET.G.Z.32 

Group set greater zero signed quadlets 

G.SET.G.Z.64 

Group set greater zero signed octlets 

G.SET.G.Z.128 

Group set greater zero signed hexlet 

G.SET.GE.Z.8 

Group set greater equal zero signed bytes 

G.SET.GE.Z.16 

Group set greater equal zero signed doublets 

G.SET.GE.Z.32 

Group set greater equal zero signed quadlets 

G.SET.GE.Z.64 

Group set greater equal zero signed octlets | 

G.SET.GE.Z.128 

Group set greater equal zero signed hexlet j 

G.SET.L.Z.8 

Group set less zero signed bytes 

G.SET.L.Z.16 

Group set less zero signed doublets j 

G.SET.L.Z.32 

Group set less zero signed quadlets 

G.SET.L.Z.64 

Group set less zero signed octlets j 

G.SET.L.Z.128 

Group set less zero signed hexlet 

G.SET.LE.Z.8 

Group set less equal zero signed bytes 

G.SET.LE.Z.16 

Group set less equal zero signed doublets 

G.SET.LE.Z.32 

Group set less equal zero signed quadlets 

G.SET.LE.Z.64 

Group set less equal zero signed octlets ! 

G.SET.LE.Z.128 

Group set less equal zero signed hexlet 

G.SET.NE.Z.8 

Group set not equal zero bytes 

G.SET.NE.Z.16 

Group set not equal zero doublets 

G.SET.NE.Z.32 

Group set not equal zero quadlets 

G.SET.NE.Z.64 

Group set not equal zero octlets 

G.SET.NE.Z.128 

Group set not equal zero hexlet 


Fig. 33A '(cont'd) 


G.SET.LE.8 

Group set less equal signed bytes 

G.SET.LE.16 

Group set less equal signed doublets 

G.SET.LE.32 

Group set less equal signed quadlets 

G.SET.LE.64 

Group set less equal signed octlets 

G.SET.LE.128 

Group set less equal signed hexlet 

G.SET.LE.U.8 

Group set less equal unsigned bytes 

G.SET.LE.U.16 

Group set less equal unsigned doublets 

G.SET.LE.U.32 

Group set less equal unsigned quadlets 

G.SET.LE.U.64 

Group set less equal unsigned octlets 

G.SET.LE.U.128 

Group set less equal unsigned hexlet 

G.SET.G.8 

Group set signed greater bytes 

G.SET.G.16 

Group set signed greater doublets 

G.SET.G.32 

Group set signed greater quadlets 

G.SET.G.64 

Group set signed greater octlets 

G. SET. G. 128 

Group set signed greater hexlet 

G.SET.G.U.8 

Group set greater unsigned bytes 

G.SET.G.U.16 

Group set greater unsigned doublets 

G.SET.G.U.32 

Group set greater unsigned quadlets 

G.SET.G.U.64 

Group set greater unsigned octlets j 

G.SET.G.U.128 

Group set greater unsigned hexlet 


G.SET.E.Z.size rd=rc 

<— 

G.SET.AND.E.size rd=rc,rc 

G.SET.G.Z.size rd=rc 

<= 

G.SET.L.U.size rd=rc,rc j 

G.SET.GE.Z.size rd=rc 

<= 

G. SET. GE. size rd=rc,rc 

G.SET.LZ.size rd=rc 

<= 

G.SET.L.size rd=rc,rc 

G.SET.LE.Z.size rd=rc 

<= 

G.SET.GE.U.size rd=rc,rc 

G.SET.NE.Z.size rd-rc 

<— 

G.SET.AND.NE.size rd=rc,rc 

G.SET.G.size rd=rb,rc 

-> 

G.SET.L.size rd=rc,rb 

G.SET.G.U.size rd=rb,rc 

-> 

G.SET.L.U.size rd=rc,rb | 

G.SET.LE.size rd=rb,rc 

-> 

G.SET.GE.size rd=rc,rb 

G.SET.LE.U.size rd=rb,rc 

-> 

G.SET.GE.U.size rd=rc,rb 


Fig. 33A (cont'd) 


Format 


G.op.size rd=rb,rc 
rd=gopsize(rb,rc) 

31 24 23 18 17 12 11 65 

| G.size I rd | rc | rb | op 

8 6 6 6 6 


Fig. 33B 


Definition 


def GroupReversedfop.size.rd^crb) 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
case op of 
G.SUB: 

for i <- 0 to 1 28-size by size 

a i+size-1..i <- bj+size-1..i - Cj+ s ize-1..i 
endfor 
G.SUB.L: 

for i <— 0 to 1 28-size by size 

* *- ( b i+size-1 II bj+size-1..i) - (cj+size-1 II Ci+size-1..i) 

ai+size-1..i <- (tsize * t S jze-l) ? (tsize II t|g£l) : t S ize-1..0 
endfor 
G.SUB.LU: 

for i <- 0 to 1 28-size by size 

t <- (0 1 || bi +S i Z e-i j) - (0 1 || c i+ size-1.j) 

ai+size-1..i <- (tsize * 0) ? 0 size : t S j 2e -1..0 
endfor 
G.SUB.O: 

for i <- 0 to 1 28-size by size 

t «- (bj+size-1 II bj+size-1..i) - (Cj+size-1 II c i+size-1..i) 
if (tsize *t size -i) then 

raise FixedPointArithmetic 

endif 

3i+size-1..i <-t S i ze -i..o 
endfor 
G.SUB.U.O: 

for i +- 0 to 1 28-size by size 

t <- (0 1 || bj+size-1 ..i) - (0 1 || q+size-1 ..i) 
If (tsize * 0) then 

raise FixedPointArithmetic 

endif 

a i+size-1..i <- ^size-L.O 
endfor 
G.SET.E: 

for i <— 0 to 1 28-size by size 

a i+size-1..i <- (bj+size-1. J = Cj+ S ize-1..i) s,ze 
endfor 
G.SET.NE: 

for i <- 0 to 1 28-size by size 

ai+size-1..i <- (bj+sLe-U * Cj+ s jze-1..i) s,ze 
endfor 
G.SET.AND.E: 

for i <r- 0 to 1 28-size by size 

a i+size-1..i «- ((bj+size-1 ..i and Cj+ S j ze -1..i) = 0) s,ze 
endfor 


Fig. 33C 


G.SET.AND.NE: 

for i <r- 0 to 128-size by size 

aj+size-U <- ((bj+size-1..i and Cj +S j ze -i..j) * 0) s,ze 
endfor 
G.SET.L: 

for i <- 0 to 128-size by size 

ai+size-1..i <- (( r c = rb) ? (bj +S jze-1J < 0) : (bj+ S j 2e -i..j < Cj+ S j ze -i..j)) sj; 
endfor 
G.SET.GE: 

for i <r- 0 to 128-size by size 

aj+size-1..i <- ((re = rb) ? (bj +S j 2e -i..i > 0) : (bj+ sl2e -i..j > Cj +slze -i..j)) sl2 
endfor 
G.SET.LU: 

for i «- 0 to 128-size by size 

ai+size-1..i <- ((rc = rb) ? (bj+ S j ze -i..j > 0) : 
((0||b i+S i Z e-i.j)<(0||Ci +S i ze .i.j))) size 

endfor 
G.SET.GE.U: 

for i <r- 0 to 128-size by size 

aj+size-U <- ((rc = rb) ? (bj +S j ze -l..i ^ 0) : 
((0 l|bi + si ze .i..i)>(0||Ci +sjZ e.i..i))) sjze 

endfor 

endcase 

RegWrite(rd, 128, a) 
enddef 


Exceptions 

Fixed-point arithmetic 


Fig. 33C (cont'd) 


5 


E.DIV.64 

Ensemble divide signed octlets 

E.DIV.U.64 

Ensemble divide unsigned octlets 

E.MUL.8 

Ensemble multiply signed bytes 

E.MUL.16 

Ensemble multiply signed doublets 

E.MUL.32 

Ensemble multiply signed quadlets 

E.MUL.64 

Ensemble multiply signed octlets 

E.MUL.SUM.8 

Ensemble multiply sum signed bytes 

E.MUL.SUM.16 

Ensemble multiply sum signed doublets 

E.MUL.SUM.32 

Ensemble multiply sum signed quadlets 

E.MULSUM.64 

Ensemble multiply sum signed octlets j 

E.MUL.C.8 

Ensemble complex multiply bytes j 

E.MUL.C.16 

Ensemble complex multiply doublets 

E.MUL.C.32 

Ensemble complex multiply quadlets 

E.MUL.M.8 

Ensemble multiply mixed-signed bytes 

E.MUL.M.16 

Ensemble multiply mixed-signed doublets 

E.MUL.M.32 

Ensemble multiply mixed-signed quadlets 

E.MUL.M.64 

Ensemble multiply mixed-signed octlets 

E.MUL.P.8 

Ensemble multiply polynomial bytes 

E.MUL.P.16 

Ensemble multiply polynomial doublets 

E.MUL.P.32 

Ensemble multiply polynomial quadlets 

E.MUL.P.64 

Ensemble multiply polynomial octlets 

E.MUL.SUM.C.8 \ 

Ensemble multiply sum complex bytes 

E.MUL.SUM.C.16 

Ensemble multiply sum complex doublets 

E.MUL.SUM.C.32 

Ensemble multiply sum complex quadlets 

E.MUL.SUM.M.8 

Ensemble multiply sum mixed-signed bytes 

E.MULSUM.M.16 

Ensemble multiply sum mixed-signed doublets 

E.MUL.SUM.M.32 

Ensemble multiply sum mixed-signed quadlets 

E.MUL.SUM.M.64 

Ensemble multiply sum mixed-signed octlets 

E.MUL.SUM.U.8 

Ensemble multiply sum unsigned bytes 

C Ml II CI III A M A ft 
L.MUL.OUM.U.lD 

tnsemoie multiply sum unsigned doublets 

E.MULSUM.U.32 

Ensemble multiply sum unsigned quadlets 

E.MULSUM.U.64 

Ensemble multiply sum unsigned octlets | 

E.MULU.8 

Ensemble multiply unsigned bytes 

E.MULU.16 

Ensemble multiply unsigned doublets 

E.MULU.32 

Ensemble multiply unsigned quadlets 

E.MULU.64 

Ensemble multiply unsigned octlets j 


Fig. 34A 


Format 

E.op.size rd=rc,rb 
rd=eopsize(rc,rb) 

31 24 23 18 17 12 11 65 0 

I E.size I rd | rc | rb | op 

8 6 6 6 6 


Fig. 34B 


Definition 


def muKsize.h.vs.v.i.ws.wj) as 

mul <- ((vs&v size . 1+ i) h - size || v si2e .i +U ) * ((ws&w size -i + j) h - size It w si2 e-i + j..j) 
enddef 

def c <- PolyMu!tiply(size,a,b) as 

p[0 ]^o 2 * size 
for k <- 0 to size-1 

p[k+1] <- p[k] A a k ? (0 sjze - k || b || 0 k ) : 0 2 * size 

endfor 
c <- p[size] 
enddef 

def Ensemble(op,size,rd,rc,rb) 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
case op of 

E.MUL:, E.MULC:, EMULSUM, E.MULSUM.C, E.CON, E.CON.C, E.DIV: 

cs <- bs <- 1 
E.MULM:, EMULSUM. M, E.CON.M: 

cs <- 0 

bs<-1 

E.MULU:, EMULSUM.U, E.CON.U, E.DIV.U, E.MULP: 
cs <- bs <- 0 

endcase 
case op of 

E.MUL, E.MULU, E.MULM: 

for i <- 0 to 64-size by size 

d 2*(i+size)-1..2*i <- mul(size,2*size,cs,c,j,bs 1 b l i) 

endfor 
E.MULP: 

for i <- 0 to 64-size by size 

d2*(i+size)-1..2*i <- PolyMultiply(size,Csj Z e.l+i..i,b S j ze -i + i..j) 

endfor 
E.MULC: 

for i <- 0 to 64-size by size 
if (i and size) = 0 then 

p <r- mul(size,2*size,1,c,i,1,b,i) - mul(size,2*size,1,c,i+size,1,b,i+size) 

else 

p <- mul(size,2*size,1,c,i,1,b,i+size) + mul(size > 2*size,1,c,i,1,b,i+size) 

endif 

d 2*(i+size)-1..2*i<-P 

endfor 

E.MULSUM, E.MULSUM.U, E.MULSUM.M: 
p[0]<-0 128 

for i <- 0 to 1 28-size by size 

p[i+size] <- p[i] + muKsize.^S.cs.c.i.bs.b.i) 

endfor 
a <- p[128] 
E.MULSUM.C: 

p[0] <- 0 64 
pfsize] <- 0 64 

for i <- 0 to 1 28-size by size 
if (i and size) = 0 then 

p[i+2*size] <- p[i] + mul(size,64,1,c,i l 1 l b,i) 

-muKsize^.l.c.i+size.l.b.i+size) 

else 

p(i+2*size] <- p[i] + muKsize.&^l.cU.b.i+size) 

+ mul(size,64,1,c,i+size,1,b,i) 

endif 

endfor 

a«-p[128+size]||p[128] 

Fig. 34C 


E.CON, E.CON.U, E.CON.M: 
p[0]<-0 128 

for j <- 0 to 64-size by size 

for i <- 0 to 64-size by size 

pD+size] 2 *(j +S jze)-1..2*i «- PD]2*(i+size)-1..2*i + 
mul(size,2*size,cs,c f i+64-j,bs,bj) 

endfor 

endfor 
a <- p[64] 
E.CON.C: 

p[0] <- 0 128 

for j <- 0 to 64-size by size 

for i <- 0 to 64-size by size 

if ((-i) and j and size) = 0 then 

pD+size] 2 *(i + size)-1..2*i <- PD]2*(i+size)-1..2*i 
mul(size,2*size l 1 f c,i+64-j,1,bj) 

else 

pO+size]2*(j+ S jze)-1..2*i <- PDl2*(i+size)-1..2*i 
mul(size,2*size,1 I c,i+64-j+2*size,1,bj) 

endif 

endfor 

endfor 
a <- p[64] 

E.DIV: 

if (b = 0) or ( (c = (1||0 63 )) and (b = 1 64 ) ) then 
a +- undefined 

else 

q <- c/b 

r «- c - q*b 

a «- r 63..0 II Q63..0 

endif 
E.DIV.U: 

if b = 0 then 

a <- undefined 

else 

q<-(0 || c)/(0 || b) 
r<-c-(0||q)-(0 || b) 
a <- r 63..0 II Q63..0 

endif 

endcase 

RegWrite(rd, 128, a) 
enddef 


Exceptions 

none 


Fig. 34C (cont'd) 


G.C0M.AND.E.8 

Group compare and equal zero bytes 

G.C0M.AND.E.16 

Group compare and equal zero doublets j 

G.COM.AND.E.32 

Group compare and equal zero quadlets 

G.COM.AND.E.64 

Group compare and equal zero octlets 

G.COM.AND.E.128 

Group compare and equal zero hexlet 

G.C0M.AND.NE.8 

Group compare and not equal zero bytes 

G.C0M.AND.NE.16 

Group compare and not equal zero doublets 

G.COM.AND.NE.32 

Group compare and not equal zero quadlets 

G.COM.AND.NE.64 

Group compare and not equal zero octlets 

G.COM.AND.NE.128 

Group compare and not equal zero hexlet 

G.C0M.E.8 

Group compare equal bytes { 

G.C0M.E.16 

Group compare equal doublets 

G.COM.E.32 

Group compare equal quadlets ] 

G.COM.E.64 

Group compare equal octlets 

G.COM.E.128 

Group compare equal hexlet 

G.C0M.GE.8 

Group compare greater equal signed bytes 

G.C0M.GE.16 

Group compare greater equal signed doublets 

G.COM.GE.32 

Group compare greater equal signed quadlets 

G.COM.GE.64 

Group compare greater equal signed octlets 

G.COM.GE.128 

Group compare greater equal signed hexlet j 

G.C0M.GE.U.8 

Group compare greater equal unsigned bytes 

G.C0M.GE.U.16 

Group compare greater equal unsigned doublets 

G.COM.GE.U.32 

Group compare greater equal unsigned quadlets 

G.COM.GE.U.64 

Group compare greater equal unsigned octlets 

G.COM.GE.U.128 

Group compare greater equal unsigned hexlet 

G.C0M.L.8 

Group compare signed less bytes 

G.C0M.L.16 

Group compare signed less doublets 

G.COM.L.32 

Group compare signed less quadlets 

G.COM.L.64 

Group compare signed less octlets 

G.COM.L.128 

Group compare signed less hexlet 

G.C0M.LU.8 

Group compare less unsigned bytes 

G.C0M.L.U.16 

Group compare less unsigned doublets 

G.COM.LU.32 

Group compare less unsigned quadlets 

G.COM.L.U.64 

Group compare less unsigned octlets 

G.COM.L.U.128 

Group compare less unsigned hexlet 

G.C0M.NE.8 

Group compare not equal bytes 

G.C0M.NE.16 

Group compare not equal doublets 

G.COM.NE.32 

Group compare not equal quadkts 

G.COM.NE.64 

Group compare not equal octlets 

G.COM.NE.128 i 

Group compare not equal hexlet 


Fig. 35A 


Format 


G.COM.op.size rd.rc 
G.COM.opz.size red 

gcomopsize(rd,rc) 

31 24 23 18 17 12 11 65 0 

I G.size | rd I re | op | GCOM | 

8 6 6 6 6 


Fig. 35B 


Definition 

def GroupCompare(op,size,rd,rc) 
d <- RegRead(rd, 128) 
c <r- RegRead(rc, 128) 
case op of 

G.COM.E: 

for i <- 0 to 1 28-size by size 

3i+size-1..i <- (dj+size-1..i = Ci+size-1..i) slze 
endfor 
G.COM.NE: 

for i <- 0 to 1 28-size by size 

ai+size-1..i <~ (dj+size-1..i * Ci+size-1..i) slze 
endfor 
G.COM.AND.E: 

for i <- 0 to 1 28-size by size 

aj+size-1..i <- ((Cj+size-1..i and dj +S i 2e -l..i) = 0) slze 
endfor 
G.COM.AND.NE: 

for i <- 0 to 1 28-size by size 

ai+size-1..i <- ((cj+size-1..i and dj +S j Z e-1..i) * 0) size 
endfor 
G.COM.L: 

for i <- 0 to 1 28-size by size 

aj+size-1..i <- ((rd = rc) ? (c i+S ize-1..i < 0) : (dj +S ize-1..i < Cj+ S ize-1..i)) size 
endfor 
G.COM.GE: 

for i <- 0 to 1 28-size by size 

aj+size-1..i «- ((rd = rc) ? (q+size-U > 0) : (dj +S j Z e-1..i * c,+ S ize-1..i)) size 
endfor 
G.COM.L.U: 

for i <r- 0 to 1 28-size by size 

aj+size-1..i <- ((rd = rc) ? (c i+S ize-1..i > 0) : 
((0||d +sl ze-1..i)<(0 || c i+S ize-1..i))) sjze 

endfor 
G.COM.GE.U: 

for i ^ 0 to 1 28-size by size 

aj+size-1..i +- ((rd = rc) ? (q +S jze-1..i * 0) : 
((0 II d i+S ize-1..i) * (0 II c i+ size-lj))) si2e 

endfor 

endcase 
if (a * 0) then 

raise FixedPointArithmetic 

endif 
enddef 


Exceptions 

Fixed-point arithmetic 


Fig. 35C 


E.L0G.M0ST.8 

Ensemble log of most significant bit signed bytes 

E.L0G.M0ST.16 

Ensemble log of most significant bit signed doublets 

E.LOG.MOST.32 

Ensemble log of most significant bit signed quadlets 

^™ 1 ft ft ^ ft 

E.LOG.MOST.64 

Ensemble log of most significant bit signed octlets 

E.LOG.MOST.128 

Ensemble log of most significant bit signed hexlet 

E.L0G.M0ST.U.8 

Ensemble log of most significant bit unsigned bytes j 

E.L0G.M0ST.U.16 

Ensemble log of most significant bit unsigned doublets 

E.LOG.MOST.U.32 

Ensemble log of most significant bit unsigned quadlets 

E.LOG.MOST.U.64 

Ensemble log of most significant bit unsigned octlets 

E.LOG.MOST.U.128 

Ensemble log of most significant bit unsigned hexlet f 

E.SUM.8 

Ensemble sum signed bytes j 

E.SUM.16 

Ensemble sum signed doublets 

C CI IK A O O 

c. SUM. 32 

Ensemble sum signed quadlets j 

E.SUM.64 

Ensemble sum signed octlets 

E.SUM.U.1 

Ensemble sum unsigned bits 

E.SUM.U.8 

Ensemble sum unsigned bytes 

E.SUM.U.16 

Ensemble sum unsigned doublets 

E.SUM.U.32 

Ensemble sum unsigned quadlets J 

E.SUM.U.64 

Ensemble sum unsigned octlets 


Selection 


class 

op 

size 

sum 

SUM 

8 16 32 64 

SUM.U 

1 8 16 32 64 

log most 
significant bit 

LOG.MOST LOG.MOST.U 

8 16 32 64 128 


Fig. 36A 


Format 

E.op.size rd=rc 
rd=eopsize(rc) 

31 24 23 18 17 12 11 65 0 

I E.size I rd | rc | op | E. UNARY I 

8 6 6 6 6 


Fig. 36B 


Definition 


def EnsembleUnary(op,size,rd,rc) 
c <- RegRead(rc, 128) 
case op of 

E.LOG.MOST: 

for i <— 0 to 128-size by size 
if (Cj+size-1..i =0) then 
a i+size-1..i <~ "1 

else 

forj <- 0 to size-1 

if Csize-1+i..j+i = (cfiill^jj II not c size . 1+l ) then 
a i+size-1..i <- j 

endif 
endfor 

endif 
endfor 
E.LOG.MOSTU: 

for i <- 0 to 128-size by size 
if (ci+size-1..i =0) then 
a i+size-1..i <- "1 

else 

forj <-0 to size-1 

ifc S ize-1 + i..j + i = (0 size - 1 -j||1)then 

a|+size-1..i<-j 

endif 
endfor 

endif 
endfor 
E.SUM: 

p[0] <- 0 128 

for i <- 0 to 128-size by size 

pti+size] +- p[i] + (cjg&fa? || c S ize.1+L.i) 
endfor 
a<-p[128] 
E.SUMU: 

p[0] <_ o 128 

for i <— 0 to 128-size by size 

p[i+size] <- p[i] + (0l 28 -size || c S ize-1+i..i) 
endfor 
a<-p[128] 

endcase 

RegWrite(rd, 128, a) 
enddef 

Exceptions 

none 


Fig. 36C 


Floating-point function Definitions 

def eb <- ebits(prec) as 
case pref of 


16: 



eb +- 5 

32: 



eb<-8 

64: 



eb<- 11 

128: 



eb<- 15 


endcase 
enddef 

def eb <- ebias(prec) as 

eb <- 0 || iebits(prec)-1 
enddef 

def fb <- fblts(prec) as 

fb <- prec - 1 - eb 
enddef 

def a +- F(prec, ai) as 
a.s <— aiprec-1 
ae <r- ai pre c-2..fbits(prec) 
af <-aifbits(prec)-1..0 
ifae=1 ebits(prec) then 

if af = 0 then 

a.t <- INFINITY 
elseif affbits(prec)-1 then 

a.t <- SNaN 

a.e <- -fbits(prec) 

a.f*-1 || affbits(prec)-2..0 

else 

a.t <- QNaN 
a.e <- -fblts(prec) 
a.f <- af 

endif 

Fig. 37 


elseif ae = 0 then 
if af = 0 then 

a.t <- ZERO 

else 

a.t <r- NORM 

a.e <- 1-ebias(prec)-fbits(prec) 
a.f<-0|| af 

endif 

else 

a.t <- NORM 

a.e ae-ebias(prec)-fbits(prec) 
a.f <r- 1 || af 

endif 
enddef 

def a <- DEFAULTQNAN as 

a.s<-0 

a.t <- QNAN 

a.e <- -1 

a.f<- 1 
enddef 

def a <- DEFAULTSNAN as 

a.s<-0 

a.t <- SNAN 

a.e <r- -1 

a.f «- 1 
enddef 

def fadd(a.b) as faddr(a,b,N) enddef 

def c <- faddr(a,b,round) as 

if a.t=NORM and b.t=NORM then 

// d,e are a,b with exponent aligned and fraction adjusted 
if a.e > b.e then 
d <- a 
e.t<-b.t 
e.s +- b.s 
e.e <- a.e 

e.f<-b.f ||0 ae - be 
else if a.e < b.e then 
d.t <- a.t 
d.s <- a.s 
d.e <- b.e 

d.f a.f || o b e " a e 
e <- b 

endif 
c.t <- d.t 
c.e <- d.e 
if d.s = e.s then 

c.s <- d.s 

c.f <- d.f + e.f 
elseif d.f > e.f then 

c.s <- d.s 

c.f d.f -e.f 


Fig. 37 (cont'd) 


elseif d.f < e.f then 
c.s <- e.s 
c.f<-e.f-d.f 

else 

c.s <- r=F 
c.t <- ZERO 

endif 

// priority is given to b operand for NaN propagation 
elseif (b.t=SNAN) or (b.t=QNAN) then 
c <- b 

elseif (a.t=SNAN) or (a.t=QNAN) then 
c <- a 

elseif a.t=ZERO and b.t=ZERO then 
c.t <- ZERO 

c.s <- (a.s and b.s) or (round=F and (a.s or b.s)) 
// NULL values are like zero, but do not combine with ZERO to alter sign 
elseif a.t=ZERO or a.t=NULL then 

c <- b 

elseif b.t=ZERO or b.t=NULL then 
c < a 

elseif a.t=INFINITY and b.t=INFINITY then 
if a.s * b.s then 

c <- DEFAULTSNAN // Invalid 

else 

c <- a 

endif 

elseif a.t=INFINITY then 
c <- a 

elseif b.t=INFINITY then 
c <- b 

else 

assert FALSE // should have covered ai the cases above 

endif 
enddef 


def b <- fneg(a) as 
b.s < — a.s 
b.t <- a.t 
b.e <- a.e 
b.f +- a.f 

enddef 


def fsub(a.b) as fsubr(a,b t N) enddef 

def fsubr(a,b,round) as faddr(a,fneg(b) ? round) enddef 

def frsub(a.b) as frsubr(a,b,N) enddef 

def frsubr(a,b,round) as faddr(fneg(a),b,round) enddef 

def c «— fconri(a,b / »°s 

if (a.t=SNAN) or (a.t=QNAN) or (b.t=SNAN) or (b.t=QNAN) then 
c<-U 

elseif a.t=INFINITY and b.t=INFINITY then 
if a.s * b.s then 

c <- (a.s=0) ? G: L 


Fig. 37 (confd) 


else 

c<-E 

endif 

elseifa.t=INFINITY then 

c <- (a.s=0) ? G: L 
elseif b.t=INFINITY then 

c <- (b.s=0) ? G: L 
elseif a.t=NORM and b.t=NORM then 

if a.s* b.s then 

c <- (a.s=0) ? G: L 

else 

if a.e > b.e then 
af <- a.f 

bf<-b.f ||O a - e - b - e 

else 

af <- a.f || o b e " a e 
bf<-b.f 

endif 

if af=bfthen 
c<-E 

else 

c<_((a.s=0) A (af>bf))?G:L 

endif 

endif 

elseif a.t=NORM then 

c <- (a.s=0) ? G: L 
elseif b.t=NORM then 

c <- (b.s=0) ? G: L 
elseif a.t=ZERO and b.t=ZERO then 

c<-E 

else 

assert FALSE // should have covered ai the cases above 

endif 
enddef 

def c<-fmul(a,b) as 

if a.t=NORM and b.t=NORM then 

c.s <r- a.s A b.s 

c.t <r- NORM 

c.e <- a.e + b.e 

c.f <-a.f*b.f 
// priority is given to b operand for NaN propagation 
elseif (b.t=SNAN) or (b.t=QNAN) then 

c.s <r- a.s A b.s 

c.t <r- b.t 

c.e <- b.e 
c.f<-b.f 

elseif (a.t=SNAN) or (a.t=QNAN) then 
c.s <- a.s A b.s 
c.t <r- a.t 
c.e <- a.e 
c.f <r- a.f 

elseif a.t=ZERO and b.t=INFINITY then 

c <- DEFAULTSNAN // Invalid 
elseif a.t=INFINITY and b.t=ZERO then 

c <- DEFAULTSNAN // Invalid 


Fig. 37 (cont'd) 


elseif a.t=ZERO or b.t=ZERO then 
c.s <- a.s A b.s 
c.t <- ZERO 

else 

assert FALSE // should have covered al the cases above 

endif 
enddef 

def c <- fdivr(a.b) as 

if a.t=NORM and b.t=NORM then 
c.s «- a.s A b.s 
c.t <- NORM 
c.e <- a.e - b.e + 256 

c.f<-(a.f ||0 256 )/b.f 
// priority is given to b operand for NaN propagation 
elseif (b.t=SNAN) or (b.t=QNAN) then 

c.s <r- a.s A b.s 

c.t <- b.t 

c.e <- b.e 

c.f<-b.f 

elseif (a.t=SNAN) or (a.t=QNAN) then 
c.s <r- a.s A b.s 
c.t <- a.t 
c.e <- a.e 
c.f <- a.f 

elseif a.t=ZERO and b.t=ZERO then 

c <- DEFAULTSNAN // Invalid 
elseif a.t=INFINITY and b.t=INFINITY then 

c <- DEFAULTSNAN // Invalid 
elseif a.t=ZERO then 

c.s <- a.s A b.s 

c.t <- ZERO 
elseif a.t=INFINITY then 

c.s <- a.s A b.s 

c.t <- INFINITY 

else 

assert FALSE // should have covered al the cases above 

endif 
enddef 

def msb <- findmsb(a) as 

MAXF <r- 2 18 // Largest possible f value after matrix multiply 
forj<-OtoMAXF 

ifa MA XF-1..j = (0 MAXF - 1 -i||1)then 
msb <- j 

endif 
endfor 
enddef 

def ai <- PackF(prec,a, round) as 
case a.t of 
NORM: 

msb <r- findmsb(a.f) 

rn msb-l-fbits(prec) // Isb for normal 

rdn <- -ebias(prec)-a.e-1-fbits(prec) // Isb if a denormal 

rb (rn > rdn) ? rn : rdn 


Fig. 37 (cont'd) 


if rb < 0 then 

aifr<-a.f mS b-1..0 II 0* 
eadj <- 0 

else 

case round of 
C: 

s <r- o™ 1 *-* || (-a.s)* 

F: 

s <- o msl>rb || (a s) rb 
N, NONE: 

s *- omsbHfb || . a>frb || a.ffg-1 

X: 

if a.frb-i..o*0 then 

raise FloatingPointArithmetic // Inexact 

endif 
s <- 0 

Z: 

s <- 0 

endcase 

v^(0||a.f ms b..0) + (0||s) 
* v msb = 1 then 

aifr<-v msb -i..rb 

eadj <- 0 

else 

aifr <- o fbits (P rec ) 
eadj <- 1 

endif 

endif 

aien <- a.e + msb - 1 + eadj + ebias(prec) 
if aien <, 0 then 

if round = NONE then 

ai <- a.s || o ebits (P rec ) || aifr 

else 

raise FloatingPointArithmetic //Underflow 

endif 

elseif aien > 1 ebits(prec) then 
if round = NONE then 

//default: round-to-nearest overflow handling 
ai <- a.s || 1 ebits(prec) |j 0 fbits(prec) 

else 

raise FloatingPointArithmetic //Underflow 

endif 

else 

ai <- a.s || aien e bits(prec)-1..0 II aifr 

endif 
SNAN: 

if round * NONE then 

raise FloatingPointArithmetic //Invalid 

endif 

if -a.e < fbits(prec) then 

ai <- a.s || lebits(prec) ( | a .f_ a . e -i..o II 0 fbjts (P rec ) +a e 

Fig. 37 (cont'd) 


else 

Isb <- a.f_ a .e-1-fbits(prec)+1..0 * 0 

ai <- a.s || lebits(prec) „ aWl..-a.e-1-Ms(prec)+2 II Isb 

endif 
QNAN: 

if -a.e < fbits(prec) then 

ai <- a.s || lebits(prec) y a .f. a .e-1..0 II O fbits (P rec ) +a e 

else 

Isb <- a.f_ a . e -i-fbits(prec)+1..0 * 0 

ai <- a.s || lebits(prec) „ aWl..-a.e-1-Ms(prec)+2 II Isb 

endif 
ZERO: 

ai <- a.s || o ebits (P rec ) || o fblts (P rec ) 
INFINITY: 

ai <- a.s || lebits(prec) jj 0 fbits(prec) 

endcase 
defdef 

def ai <- fsinkr(prec, a, round) as 
case a.t of 
NORM: 

msb <- findmsb(a.f) 
rb <- -a.e 
if rb < 0 then 

aifr<-a.f ms b..O 110'* 
aims <- msb - rb 

else 

case round of 
C, CD: 

s <- Q™b-ib || (-ai.s) 1 * 
F, F.D: 

s <- Qwsb-rb || (ai s) rb 
N, NONE: 

s <_ 0 msb-rb jj _ aifrb ( | ai f rg-1 

X: 

if ai.f r b-i.. 0^0 then 

raise FloatingPointArithmetic // Inexact 

endif 
s <- 0 
Z, Z.D: 

s <- 0 

endcase 

v<-(0||a.f ms b..0) + (0||s) 
if v msb = 1 then 

aims <r~ msb + 1 - rb 

else 

aims «- msb - rb 

endif 

aifr <- v a ims..rb 

endif 

if aims > prec then 
case round of 

CD, F.D, NONE, Z.D: 

ai <- a.s || (-as)P rec " 1 

Fig. 37 (cont'd) 


C, F, N, X, Z: 

raise FloatingPointArithmetic // Overflow 

endcase 
elseif a.s = 0 then 
ai <- aifr 

else 

ai <- -aifr 

endif 
ZERO: 

ai <- 0P rec 
SNAN, QNAN: 

case round of 

CD, F.D, NONE, Z.D: 

ai <_ 0P rec 
C,F,N ( X,Z: 

raise FloatingPointArithmetic// Invalid 

endcase 
INFINITY: 

case round of 

CD, F.D, NONE, Z.D: 

ai <- a.s || (~as)P rec - 1 
C, F, N, X, Z: 

raise FloatingPointArithmetic// Invalid 

endcase 

endcase 
enddef 


def c <r- frecrest(a) as 
b.s<-0 
b.t <- NORM 
b.e<-0 
b.f<-1 

c <- fest(fdiv(b,a)) 
enddef 


def c <- frsqrest(a) as 

b.s <r- 0 

b.t <- NORM 

b.e<-0 

b.f<-1 

c <- fest(fsqr(fdiv(b,a))) 
enddef 


def c <- fest(a) as 

if (a.t=NORM) then 

msb <- findmsb(a.f) 
a.e a.e + msb - 13 
a.f<-a.f ms b..msb-12ll 1 

else 

c * a 

endif 
enddef 


def c fsqr(a) as 

if (a.t=NORM) and (a.s=0) then 
c.s^-0 
c.t<-NORM 
if (a.eo = 1)then 

Fig. 37 (cont'd) 


c.e«-(a.e-127)/2 
c.f<-sqr(a.f ||0 127 ) 

else 

c.e<-(a.e-128)/2 
c.f<-sqr(a.f||0 128 ) 

endif 

elseif (a.t=SNAN) or (a.t=QNAN) or a.t=ZERO or ((a.t=INFINITY) and (a.s=0)) then 
c 4— a 

elseif ((a.t=NORM) or (a.t=INFINITY)) and (a.s=1) then 
c <- DEFAULTSNAN // Invalid 

else 

assert FALSE // should have covered at the cases above 

endif 
enddef 

Fig. 37 (cont'd) 


E.ADD.F.16 

Ensemble add floating-point half 

E.ADD.F.16.C 

Ensemble add floating-point half ceiling 

E.ADD.F.16.F 

Ensemble add floating-point half floor 

E.ADD.F.16.N 

Ensemble add floating-point half nearest 

E.ADD.F.16.X 

Ensemble add floating-point half exact 

E.ADD.F.16.Z 

Ensemble add floating-point half zero 

E.ADD.F.32 

Ensemble add floating-point single 

E.ADD.F.32.C 

Ensemble add floating-point single ceiling 

E.ADD.F.32.F 

Ensemble add floating-point single floor 

E.ADD.F.32.N 

Ensemble add floating-point single nearest 

E.ADD.F.32.X 

Ensemble add floating-point single exact 

E.ADD.F.32.Z 

Ensemble add floating-point single zero 

E.ADD.F.64 

Ensemble add floating-point double 

E.ADD.F.64.C 

Ensemble add floating-point double ceiling 

E.ADD.F.64.F 

Ensemble add floating-point double floor 

E.ADD.F.64.N 

Ensemble add floating-point double nearest 

E.ADD.F.64.X 

Ensemble add floating-point double exact 

E.ADD.F.64.Z 

Ensemble add floating-point double zero 

E.ADD.F.128 

Ensemble add floating-point quad 

E.ADD.F.128.C 

Ensemble add floating-point quad ceiling 

E.ADD.F.128.F 

Ensemble add floating-point quad floor 

E.ADD.F.128.N 

Ensemble add floating-point quad nearest 

E.ADD.F.128.X 

Ensemble add floating-point quad exact 

E.ADD.F.128.Z 

Ensemble add floating-point quad zero 

E.DIV.F.16 

Ensemble divide floating-point half 

E.DIV.F.16.C 

Ensemble divide floating-point half ceiling 

E.DIV.F.16.F 

Ensemble divide floating-point half floor j 

E.DIV.F.16.N 

Ensemble divide floating-point half nearest 

E.DIV.F.16.X 

Ensemble divide floating-point half exact 

E.DIV.F.16.Z 

Ensemble divide floating-point half zero 

E.DIV.F.32 

Ensemble divide floating-point single 

E.DIV.F.32.C 

Ensemble divide floating-point single ceiling 

E.DIV.F.32.F 

Ensemble divide floating-point single floor 

E.DIV.F.32.N 

Ensemble divide floating-point single nearest 

E.DIV.F.32.X 

Ensemble divide floating-point single exact 

E.DIV.F.32.Z 

Ensemble divide floating-point single zero j 

E.DIV.F.64 

Ensemble divide floating-point double j 
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E.DIV.F.64.C 

Ensemble divide floating-point double ceiling 

E.DIV.F.64.F 

Ensemble divide floating-point double floor 

E.DIV.F.64.N 

Ensemble divide floating-point double nearest 

E.DIV.F.64.X 

Ensemble divide floating-point double exact 

E.DIV.F.64.Z 

Ensemble divide floating-point double zero 

E.DIV.F.128 

Ensemble divide floating-point quad 

E.DIV.F.128.C 

Ensemble divide floating-point quad ceiling 

E.DIV.F.128.F 

Ensemble divide floating-point quad floor 

E.DIV.F.128.N 

Ensemble divide floating-point quad nearest 

E.DIV.F.128.X 

Ensemble divide floating-point quad exact 

E.DIV.F.128.Z 

Ensemble divide floating-point quad zero 

E.MULC.F.16 

Ensemble multiply complex floating-point half 

E.MUL.C.F.32 

Ensemble multiply complex floating-point single 

E.MUL.C.F.64 

Ensemble multiply complex floating-point double 

E.MUL.F.16 

Ensemble multiply floating-point half 

E.MULF.16.C 

Ensemble multiply floating-point half ceiling 

E.MUL.F.16.F 

Ensemble multiply floating-point half floor 

E.MULF.16.N 

Ensemble multiply floating-point half nearest 

E.MULF.16.X 

Ensemble multiply floating-point half exact 

E.MULF.16.Z 

Ensemble multiply floating-point half zero 

E.MULF.32 

Ensemble multiply floating-point single 

E.MUL.F.32.C 

Ensemble multiply floating-point single ceiling 

E.MUL.F.32.F 

Ensemble multiply floating-point single floor 

E.MULF.32.N 

Ensemble multiply floating-point single nearest 

E.MULF.32.X 

Ensemble multiply floating-point single exact 

E.MULF.32.Z 

Ensemble multiply floating-point single zero 

E.MULF.64 

Ensemble multiply floating-point double 

E.MUL.F.64.C 

Ensemble multiply floating-point double ceiling j 

E.MULF.64.F 

Ensemble multiply floating-point double floor 

E.MUL.F.64.N 

Ensemble multiply floating-point double nearest 

E.MUL.F.64.X 

Ensemble multiply floating-point double exact 

E.MUL.F.64.Z 

Ensemble multiply floating-point double zero 

E.MUL.F.128 

Ensemble multiply floating-point quad 

E.MULF.128.C 

Ensemble multiply floating-point quad ceiling 

E.MULF.128.F 

Ensemble multiply floating-point quad floor 

E.MULF.128.N 

Ensemble multiply floating-point quad nearest 

E.MULF.128.X 

Ensemble multiply floating-point quad exact 

E.MUL.F.128.Z 

Ensemble multiply floating-point quad zero 
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Selection 


class 

op 

prec 

round/trap 

add 

EADDF 

16 32 64 128 

NONE C F N X Z 

divide 

EDIVF 

16 32 64 128 

NONE C F N X Z 

multiply 

EMULF 

16 32 64 128 

NONE C F N X Z 

complex multiply 

EMULC 
F 

16 32 64 

NONE 


Format 

E.op.prec. round rd=rc,rb 
rd=eopprecround(rc,rb) 

31 24 23 18 17 12 11 65 0 

I E.prec | rd | rc j rb | op.round | 

8 6 6 6 6 


Fig. 38B 


Definition 

def muKsize.v.i.wj) as 

mul <- fmul(F(si2e I v S j Z e-i+j < .i),F(size t w S i ze .i+j„j)) 
enddef 

def EnsembleFloatingPoint(op,prec,round,ra,rb,rc) as 
c <r- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
for i <— 0 to 128-prec by prec 
ci^F(prec f Cj +pr ec-l..i) 
bi <r- F(prec,bi +pr ec-lj) 
case op of 

E.ADD.F: 

ai <- faddr(ci,bi,round) 
E.MUL.F: 

ai <r- fmul(ci.bi) 
E.MUL.C.F: 

if (i and prec) then 

ai <- fadd(mul(prec,c,i t bj-prec), muKprec.c.i-prec.b.i)) 

else 

ai <- fsub(mul(prec,c,l,b f l) f mul(prec,c,i+prec,b,i+prec)) 

endif 
E.DIV.F.: 

ai <- fdiv(ci.bi) 

endcase 

a i+prec-1..i <- PackF(prec, ai, round) 
endfor 

RegWrite(rd t 128, a) 
enddef 


Exceptions 

Floating-point arithmetic 


Fig. 38C 


Operation codes 


E.MUL.ADD.C.F.16 

Ensemble multiply add complex floating-point half 

E.MUL.ADD.C.F.32 

Ensemble multiply add complex floating-point single 

E.MULADD.C.F.64 

Ensemble multiply add complex floating-point double 

E.MUL.ADD.F.16 

Ensemble multiply add floating-point half 

E.MUL.ADD.F.16.C 

Ensemble multiply add floating-point half ceiling 

E.MULADD.F.16.F 

Ensemble multiply add floating-point half floor 

E.MUL.ADD.F.16. N 

Ensemble multiply add floating-point half nearest 

E.MULADD.F.16.X 

Ensemble multiply add floating-point half exact 

E.MUL.ADD.F.16.Z 

Ensemble multiply add floating-point half zero 

E.MUL.ADD.F.32 

Ensemble multiply add floating-point single 

E.MUL.ADD.F.32.C 

Ensemble multiply add floating-point single ceiling | 

E.MUL.ADD.F.32. F 

Ensemble multiply add floating-point single floor 

E.MUL.ADD.F.32.N 

Ensemble multiply add floating-point single nearest 

E.MUL.ADD.F.32.X 

Ensemble multiply add floating-point single exact 

E.MULADD.F.32.Z 

Ensemble multiply add floating-point single zero 

E.MUL.ADD.F.64 

Ensemble multiply add floating-point double 

E.MUL.ADD.F.64.C 

Ensemble multiply add floating-point double ceiling 

E.MUL.ADD.F.64.F 

Ensemble multiply add floating-point double floor ] 

E.MUL.ADD.F.64. N 

Ensemble multiply add floating-point double nearest 

E.MULADD.F.64.X 

Ensemble multiply add floating-point double exact 

E.MUL.ADD.F.64.Z 

Ensemble multiply add floating-point double zero 

E.MUL.ADD.F.128 

Ensemble multiply add floating-point quad 

E.MUL.ADD.F.128.C 

Ensemble multiply add floating-point quad ceiling 

E.MUL.ADD.F.128. F 

Ensemble multiply add floating-point quad floor 

E.MULADD.F.128.N 

Ensemble multiply add floating-point quad nearest 

E.MUL.ADD.F. 128.X 

Ensemble multiply add floating-point quad exact 

E.MULADD.F.128.Z 

Ensemble multiply add floating-point quad zero 

E.MUL.SUB.C.F.16 

Ensemble multiply subtract complex floating-point half 

E.MULSUB.C.F.32 

Ensemble multiply subtract complex floating-point single 

E.MULSUB.C.F.64 

Ensemble multiply subtract complex floating-point double 

E.MULSUB.F.16 

Ensemble multiply subtract floating-point half 

E.MULSUB.F.32 

Ensemble multiply subtract floating-point single 

E.MUL.SUB.F.64 

Ensemble multiply subtract floating-point double ! 

E.MULSUB.F.128 

Ensemble multiply subtract floating-point quad 


Fig. 38D 


Selection 


class 

op 

type 

prec 

round/trap ! 

multiply add 

E.MULAD 
D 

F 

16 32 64 128 

NONE C F N X Z 

C.F 

16 32 64 

NONE 

multiply subtract 

E.MULSU 
B 

F 

16 32 64 128 

NONE 

C.F 

16 32 64 

NONE 


Format 


E.op.size rd@rc,rb 
rd=eopsize(rd,rc,rb) 

31 24 23 18 17 12 11 65 0 

r E.size | rd | rc I rb | op | 

8 6 6 6 6 


Fig. 38E 


Definition 

def mul(size,v,i,w,j) as 

mul <- fmul(F(size J v S i ze . 1+ i J ) l F(size ) w S i 2e .i+j,.j)) 
enddef 

def EnsembleinplaceFloatingPoint(op,size,rd,rc,rb) as 
d <- RegRead(rd, 128) 
c <- RegRead(rc, 128) 
b <r- RegRead(rb, 128) 
for i <— 0 to 128-size by size 
di<-F(prec,di +pr ec-i..j) 
case op of 

E.MUL.ADD.F: 

ai <r- fadd(di t mul(prec,c,i,b,i)) 
E.MULADD.C.F: 

if (i and prec) then 

ai <r- fadd(di, fadd(mul(prec,c,i t b,i-prec) f mul(c,i-prec,b,i))) 

else 

ai <- fadd(di, fsub(mul(prec,c,i,b,i), mul(prec,c,i+prec,b,i+prec))) 

endif 
E.MULSUB.F: 

ai <- frsub(di, mul(prec,c,i,b,i)) 
E.MULSUB.C.F: 

if (i and prec) then 

ai <- frsub(di, fadd(mul(prec,c,i,b,i-prec), mul(c,i-prec t b,i))) 

else 

ai <r- frsub(di, fsub(mul(prec,c,i,b,i) ( mul(prec,c,i+prec,b,i+prec))) 

endif 

endcase 

3i+prec-1..i <- PackF(prec, ai, round) 
endfor 

RegWrite(rd t 128, a) 
enddef 

Exceptions 

none 

Fig. 38F 


Operation codes 


E.SCAL.ADD.F.16 

Ensemble scale add floating-point half 

E.SCAL.ADD.F.32 

Ensemble scale add floating-point single 

E.SCAL.ADD.F.64 

Ensemble scale add floating-point double 


Fig. 38G 


Selection 


class 

op 

prec 

scale add 

E.SCALADD.F 

16 32 64 


Format 


E.SCALADD.F. size ra=rd,rc,rb 
ra=escaladdfsize(rd,rc,rb) 

31 24 23 18 17 12 11 65 0 

I op I rd | rc | rb | ra | 

8 6 6 6 6 

Fig. 38H 


Definition 

def EnsembleFloatingPointTernary(op,prec,rd,rc,rb,ra) as 
d <- RegRead(rd, 128) 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
for i <— 0 to 128-prec by prec 

di^F(prec I di +P rec-1.j) 

ci^Ftpreccj+prec-i.j) 

ai <- fadd(fmul(di, F(prec,bp re c-l..o)). fmul(ci, F(prec,b2* pr ec-1..prec))) 
aj+prec-1..j <- PackF(prec, ai, none) 
endfor 

RegWrite(ra, 128, a) 
enddef 

Exceptions 

none 

Fig. 381 


E.SUB.F.16 

Ensemble subtract floating-point half 

E.SUB.F.16.C 

Ensemble subtract floating-point half ceiling 

E.SUB.F.16.F 

Ensemble subtract floating-point half floor 

E.SUB.F.16.N 

Ensemble subtract floating-point half nearest 

E.SUB.F.16.Z 

Ensemble subtract floating-point half zero 

E.SUB.F.16.X 

Ensemble subtract floating-point half exact 

E.SUB.F.32 

Ensemble subtract floating-point single 

E.SUB.F.32.C 

Ensemble subtract floating-point single ceiling 

E.SUB.F.32.F 

Ensemble subtract floating-point single floor 

E.SUB.F.32.N 

Ensemble subtract floating-point single nearest 

E.SUB.F.32.Z 

Ensemble subtract floating-point single zero 

E.SUB.F.32.X 

Ensemble subtract floating-point single exact 

E.SUB.F.64 

Ensemble subtract floating-point double 

E.SUB.F.64.C 

Ensemble subtract floating-point double ceiling 

E.SUB.F.64.F 

Ensemble subtract floating-point double floor 

E.SUB.F.64.N 

Ensemble subtract floating-point double nearest 

E.SUB.F.64.Z 

Ensemble subtract floating-point double zero 

E.SUB.F.64.X 

Ensemble subtract floating-point double exact 

E.SUB.F.128 

Ensemble subtract floating-point quad 

E.SUB.F.128.C 

Ensemble subtract floating-point quad ceiling 

E.SUB.F.128.F 

Ensemble subtract floating-point quad floor 

E.SUB.F.128.N 

Ensemble subtract floating-point quad nearest 

E.SUB.F.128.Z 

Ensemble subtract floating-point quad zero 

E.SUB.F.128.X 

Ensemble subtract floating-point quad exact 


Fig. 39A 


Selection 


class 

op 

prec 

round/trap 

set 

SET. 
E LG 
L GE 

16 32 64 128 

NONE X 

subtract 

SUB 

16 32 64 128 

NONE C F N X Z 


Format 


E.op.prec.round rd=rb,rc 


rd=eopprecround(rb,rc) 

31 24 23 18 17 12 11 65 0 

I E.prec | rd | rc | rb | op.round | 


Fig. 39B 


Definition 

def EnsembleReversedFloatingPoin^op.prec/ound.rd.rcrb) as 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
for i <r- 0 to 128-prec by prec 

ci<-F(prec,c i+ p r ec-l.j) 

bi<-F(prec,bj +p rec.1..i) 

ai <- frsubr(ci,-bi, round) 

3i+prec-1..i <- PackF(prec, ai, round) 
endfor 

RegWrite(rd, 128, a) 
enddef 


Exceptions 

Floating-point arithmetic 


Fig. 39C 


Operation codes 


G.SET:E.F.16 

Group set equal floating-point half 

G.SET.E.F.16.X 

Group set equal floating-point half exact 

G.SET.E.F.32 

Group set equal floating-point single 

G.SET.E.F.32.X 

Group set equal floating-point single exact 

G.SET.E.F.64 

Group set equal floating-point double 

G.SET.E.F.64.X 

Group set equal floating-point double exact 

G.SET.E.F.128 

Group set equal floating-point quad 

G.SET.E.F.128.X 

Group set equal floating-point quad exact 

G.SET.GE.F.16.X 

Group set greater equal floating-point half exact 

G.SET.GE.F.32.X 

Group set greater equal floating-point single exact 

G.SET.GE.F.64.X 

Group set greater equal floating-point double exact 

G.SET.GE.F.128.X 

Group set greater equal floating-point quad exact 

G.SET.LG.F.16 

Group set less greater floating-point half 

G.SET.LG.F.16.X 

Group set less greater floating-point half exact 

G.SET.LG.F.32 

Group set less greater floating-point single 

G.SET.LG.F.32.X 

Group set less greater floating-point single exact 

G.SET.LG.F.64 

Group set less greater floating-point double 

G.SET.LG.F.64.X 

Group set less greater floating-point double exact 

G.SET.LG.F.128 

Group set less greater floating-point quad 

G.SET.LG.F.128.X 

Group set less greater floating-point quad exact 

G.SET.LF.16 

Group set less floating-point half 

G.SET.L.F.16.X 

Group set less floating-point half exact 

G.SET.L.F.32 

Group set less floating-point single 

G.SET.L.F.32.X 

Group set less floating-point single exact 

G.SET.LF.64 

Group set less floating-point double 

G.SET.LF.64.X 

Group set less floating-point double exact 

G.SET.LF.128 

Group set less floating-point quad 

G.SET.L.F.128.X 

Group set less floating-point quad exact 

G.SET.GE.F.16 

Group set greater equal floating-point half 

G.SET.GE.F.32 

Group set greater equal floating-point single 

G.SET.GE.F.64 

Group set greater equal floating-point double 

G.SET.GE.F.128 

Group set greater equal floating-point quad 


Fig. 39D 


Equivalencies 


G.SET.LE.F.16.X 

Group set less equal floating-point half exact 

G.SET.LE.F.32.X 

Group set less equal floating-point single exact 

G.SET.LE.F.64.X 

Group set less equal floating-point double exact 

G.SET.LE.F.128.X 

Group set less equal floating-point quad exact 

G.SET.G.F.16 

Group set greater floating-point half 

G.SET.G.F.16.X 

Group set greater floating-point half exact 

G.SET.G.F.32 

Group set greater floating-point single 

G. SET. G.F. 32.X 

Group set greater floating-point single exact 

G.SET.G.F.64 

Group set greater floating-point double 

G.SET.G.F.64.X 

Group set greater floating-point double exact 

G.SET.G.F.128 

Group set greater floating-point quad 

G.SET.G.F.128.X 

Group set greater floating-point quad exact 

G.SET.LE.F.16 

Group set less equal floating-point half 

G.SET.LE.F.32 

Group set less equal floating-point single 

G.SET.LE.F.64 

Group set less equal floating-point double 

G.SET.LE.F.128 

Group set less equal floating-point quad 


G.SET.G.F.prec rd=rb,rc 

-» 

G.SET.L.F.prec rd=rc,rb 

G. SET. G.F.precX rd=rb,rc 


G.SET.L.F.prec.X rd=rc,rb 

G.SET.LE.F.prec rd=rb,rc 

— ► 

G.SET.GE.F.prec rd=rc,rb 

G.SET.LE.F.prec.X rd=rb,rc 

-> 

G.SET.GE.F.prec.X rd=rc,rb 


Fig. 39E 


Selection 


class 

op 

prec 

round/trap 

set 

SET. 

E LG 
L GE 
G LE 

16 32 64 128 

NONE X 


Format 


G.op.prec.round rd=rb,rc 
rc=gopprecround(rb,ra) 

31 24 23 18 17 12 11 65 0 

I G.prec I rd | rc | rb | op.round | 

8 6 6 6 6 


Fig 39F 


Definition 


def GroupFloatingPointReversed(op,prec/ound,rd,rc,rb) as 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
for i 0 to 128-prec by prec 
ci<-F(prec,Ci+p r ec-l..i) 
bi<-F(prec ( bi + p r ec-l..i) 
if round*NONE then 

if (di.t = SNAN) or (ci.t = SNAN) then 
raise FloatingPointArithmetic 

endif 

case op of 

G.SET.L.F, G.SET.GE.F: 

if (di.t = QNAN) or (ci.t = QNAN) then 
raise FloatingPointArithmetic 

endif 
others: //nothing 
endcase 

endif 

case op of 

G.SET.L.F: 

ai <- bi?>ci 
G.SET.GE.F: 

ai <r- bi!?<ci 
G.SET.E.F: 

ai <~ bi=ci 
G.SET.LG.F: 

ai <r- bi*ci 

endcase 

aj+prec-1..i<-aiP rec 
endfor 

RegWrite(rd, 128, a) 
enddef 

Exceptions 


Floating-point arithmetic 


Fig. 39G 


G.C0M.E.F.16 

Group compare equal floating-point half 

G.C0M.E.F.16.X 

Group compare equal floating-point half exact 

G.COM.E.F.32 

Group compare equal floating-point single 

G.COM.E.F.32.X 

Group compare equal floating-point single exact 

G.COM.E.F.64 

Group compare equal floating-point double 

G.COM.E.F.64.X 

Group compare equal floating-point double exact 

G.COM.E.F.128 

Group compare equal floating-point quad 

G.COM.E.F.128.X 

Group compare equal floating-point quad exact 

G.C0M.GE.F.16 

Group compare greater or equal floating-point half 

G.C0M.GE.F.16.X 

Group compare greater or equal floating-point half exact 

G.COM.GE.F.32 

Group compare greater or equal floating-point single 

G.COM.GE.F.32.X 

Group compare greater or equal floating-point single exact 

G.COM.GE.F.64 

Group compare greater or equal floating-point double 

G.COM.GE.F.64.X 

Group compare greater or equal floating-point double exact 

G.COM.GE.F.128 

Group compare greater or equal floating-point quad 

G.COM.GE.F.128.X 

Group compare greater or equal floating-point quad exact 

G.C0M.L.F.16 

Group compare less floating-point half 

G.C0M.LF.16.X 

Group compare less floating-point half exact 

G.COM.LF.32 

Group compare less floating-point single 

G.COM.L.F.32.X 

Group compare less floating-point single exact 

G.COM.LF.64 

Group compare less floating-point double 

G.COM.L.F.64.X 

Group compare less floating-point double exact 

G.COM.LF.128 

Group compare less floating-point quad 

G.COM.L.F.128.X 

Group compare less floating-point quad exact 

G.C0M.LG.F.16 

Group compare less or greater floating-point half 

G.C0M.LG.F.16.X 

Group compare less or greater floating-point half exact 

G.COM.LG.F.32 

Group compare less or greater floating-point single 

G.COM.LG.F.32.X 

Group compare less or greater floating-point single exact 

G.COM.LG.F.64 

Group compare less or greater floating-point double 

G.COM.LG.F.64.X 

Group compare less or greater floating-point double exact 

G.COM.LG.F.128 

Group compare less or greater floating-point quad 

G.COM.LG.F.128.X 

Group compare less or greater floating-point quad exact 


Fig. 40A 


Format 

G.COM.op.prec.round rd.rc 
rc=gcomopprecround(rd,rc) 

31 24 23 18 17 12 11 65 0 

I G.prec | rd j rc | op | GCOM 

8 6 6 6 6 


Fig. 40B 


Definition 

def GroupCompareFloatingPoint(op,prec,round,rd,rc) as 
d <- RegRead(rd, 128) 
c <- RegRead(rc, 128) 
for i <r- 0 to 1 28-prec by prec 
di<-F(prec,di +P rec-1..i) 
ci <- F(prec l Cj+p r ec-l..i) 
if round*NONE then 

if (di.t = SNAN) or (ci.t = SNAN) then 
raise FloatingPointArithmetic 

endif 

case op of 

G.COM.LF, G.COM.GE.F: 

if (di.t = QNAN) or (ci.t = QNAN) then 
raise FloatingPointArithmetic 

endif 
others: //nothing 
endcase 

endif 

case op of 

G.COM.LF: 

ai <r- di?>ci 
G.COM.GE.F: 

ai <- di!?<ci 
G.COM.E.F: 

ai <r- di=ci 
G.COM.LG.F: 

ai <- di^ci 

endcase 

a i+prec-1..i <- a ' 
endfor 

if (a * 0) then 

raise FloatingPointArithmetic 

endif 
enddef 


Exceptions 

Floating-point arithmetic 


Fig. 40C 


E.ABS.F.16 

Ensemble absolute value floating-point half 

E.ABS.F.16.X 

Ensemble absolute value floating-point half exception 

E.ABS.F.32 

Ensemble absolute value floating-point single 

E.ABS.F.32.X 

Ensemble absolute value floating-point single exception 

E.ABS.F.64 

Ensemble absolute value floating-point double I 

E.ABS.F.64.X 

Ensemble absolute value floating-point double exception 

E.ABS.F.128 

Ensemble absolute value floating-point quad 

E.ABS.F.128.X 

Ensemble absolute value floating-point quad exception 

E.C0PY.F.16 

Ensemble copy floating-point half 

E.C0PY.F.16.X 

Ensemble copy floating-point half exception 

E.COPY.F.32 

Ensemble copy floating-point single 

E.COPY.F.32.X 

Ensemble copy floating-point single exception 

E.COPY.F.64 

Ensemble copy floating-point double 

E.COPY.F.64.X 

Ensemble copy floating-point double exception 

E.COPY.F.128 

Ensemble copy floating-point quad 

E.COPY.F.128.X 

Ensemble copy floating-point quad exception 

E.DEFLATE.F.32 

Ensemble convert floating-point half from single 

E.DEFLATE.F.32.C 

Ensemble convert floating-point half from single ceiling 

E.DEFLATE.F.32.F 

Ensemble convert floating-point half from single floor 

E.DEFLATE.F.32. N 

Ensemble convert floating-point half from single nearest 

E.DEFLATE.F.32.X 

Ensemble convert floating-point half from single exact 

E.DEFLATE.F.32.Z 

Ensemble convert floating-point half from single zero 

E.DEFLATE.F.64 

Ensemble convert floating-point single from double 

E. DEFLATE. F.64.C 

Ensemble convert floating-point single from double ceiling 

E.DEFLATE.F.64. F 

Ensemble convert floating-point single from double floor 

E.DEFLATE.F.64. N 

Ensemble convert floating-point single from double nearest j 

E.DEFLATE.F.64.X 

Ensemble convert floating-point single from double exact 

E.DEFLATE.F.64.Z 

Ensemble convert floating-point single from double zero 

E.DEFLATE.F.128 

Ensemble convert floating-point double from quad 

E.DEFLATE.F.128.C 

Ensemble convert floating-point double from quad ceiling 

E.DEFLATE.F.128.F 

Ensemble convert floating-point double from quad floor 

E. DEFLATE. F. 128. N 

Ensemble convert floating-point double from quad nearest 

E.DEFLATE.F. 128.X 

Ensemble convert floating-point double from quad exact 

E.DEFLATE.F.128.Z 

Ensemble convert floating-point double from quad zero 

E.FL0AT.F.16 

Ensemble convert floating-point half from doublets 

E.FL0AT.F.16.C 

Ensemble convert floating-point half from doublets ceiling j 

E.FL0AT.F.16.F 

Ensemble convert floating-point half fro*n doublets floor 

E.FL0AT.F.16.N 

Ensemble convert floating-point half from doublets nearest 

E.FL0AT.F.16.X 

Ensemble convert floating-point half from doublets exact 

E.FL0AT.F.16.Z 

Ensemble convert floating-point half from doublets zero j 


Fig.41A 


E.FLOAT.F.32 

Ensemble convert floating-point single from quadlets 

E.FLOAT.F.32.C 

Ensemble convert floating-point single from quadlets ceiling 

E.FLOAT.F.32.F 

Ensemble convert floating-point single from quadlets floor 

E.FLOAT.F.32.N 

Ensemble convert floating-point single from quadlets nearest 

E.FLOAT.F.32.X 

Ensemble convert floating-point single from quadlets exact 

E.FLOAT.F.32.Z 

Ensemble convert floating-point single from quadlets zero 

E.FLOAT.F.64 

Ensemble convert floating-point double from octlets 

E.FLOAT.F.64.C 

Ensemble convert floating-point double from octlets ceiling 

E.FLOAT.F.64.F 

Ensemble convert floating-point double from octlets floor 

E.FLOAT.F.64.N 

Ensemble convert floating-point double from octlets nearest 

E.FLOAT.F.64.X 

Ensemble convert floating-point double from octlets exact 

E.FLOAT.F.64.Z 

Ensemble convert floating-point double from octlets zero 

E.FLOAT.F.128 

Ensemble convert floating-point quad from hexlet 

E.FLOAT.F.128.C 

Ensemble convert floating-point quad from hexlet ceiling 

E.FLOAT.F.128.F 

Ensemble convert floating-point quad from hexlet floor 

E. FLOAT. F.1 28. N 

Ensemble convert floating-point quad from hexlet nearest 

E. FLOAT. F. 128.X 

Ensemble convert floating-point quad from hexlet exact 

E.FLOAT.F.128.Z 

Ensemble convert floating-point quad from hexlet zero 

E. INFLATE. F. 16 

Ensemble convert floating-point single from half 

E.INFLATE.F.16.X 

Ensemble convert floating-point single from half exception 

E.INFLATE.F.32 

Ensemble convert floating-point double from single 

E.INFLATE.F.32.X 

Ensemble convert floating-point double from single exception 

E.INFLATE.F.64 

Ensemble convert floating-point quad from double 

E.INFLATE.F.64.X 

Ensemble convert floating-point quad from double exception 

E.NEG.F.16 

Ensemble negate floating-point half 

E.NEG.F.16.X 

Ensemble negate floating-point half exception } 

E.NEG.F.32 

Ensemble negate floating-point single 

E.NEG.F.32.X 

Ensemble negate floating-point single exception 

E.NEG.F.64 

Ensemble negate floating-point double 

E.NEG.F.64.X 

Ensemble negate floating-point double exception 

E.NEG.F.128 

Ensemble negate floating-point quad 

E.NEG.F.128.X 

Ensemble negate floating-point quad exception 

E.RECEST.F.16 

Ensemble reciprocal estimate floating-point half 

E.RECEST.F.16.X 

Ensemble reciprocal estimate floating-point half exception 

E.RECEST.F.32 

Ensemble reciprocal estimate floating-point single 

E.RECEST.F.32.X 

Ensemble reciprocal estimate floating-point single exception 

E.RECEST.F.64 

Ensemble reciprocal estimate floating-point double 

E.RECEST.F.64.X 

Ensemble reciprocal estimate floating-point double exception 

E.RECEST.F.128 

Ensemble reciprocal estimate floating-point quad 

E.RECEST.F.128.X 

Ensemble reciprocal estimate floating-point quad exception I 


Fig. 41A (cont'd) 


E.RSQREST.F.16 

Ensemble floating-point reciprocal square root estimate half 

E.RSQREST.F.16.X 

Ensemble floating-point reciprocal square root estimate half exact 

E.RSQREST.F.32 

Ensemble floating-point reciprocal square root estimate single '■ 

E.RSQREST.F.32.X 

Ensemble floating-point reciprocal square root estimate single exact 

E.RSQREST.F.64 

Ensemble floating-point reciprocal square root estimate double 

E.RSQREST.F.64.X 

Ensemble floating-point reciprocal square root estimate double exact 

E.RSQREST.F.128 

Ensemble floating-point reciprocal square root estimate quad 

E.RSQREST.F. 128.X 

Ensemble floating-point reciprocal square root estimate quad exact 

E.SINK.F.16 

Ensemble convert floating-point doublets from half nearest default 

E.SINK.F.16.C 

Ensemble convert floating-point doublets from half ceiling 

E.SINK.F.16.C.D 

Ensemble convert floating-point doublets from half ceiling default 

E.SINK.F.16.F 

Ensemble convert floating-point doublets from half floor 

E.SINK.F.16.F.D 

Ensemble convert floating-point doublets from half floor default 

E.SINK.F.16.N 

Ensemble convert floating-point doublets from half nearest 

E.SINK.F.16.X 

Ensemble convert floating-point doublets from half exact 

E.SINK.F.16.Z 

Ensemble convert floating-point doublets from half zero 

E.SINK.F.16.Z.D 

Ensemble convert floating-point doublets from half zero default 

E.SINK.F.32 

Ensemble convert floating-point quadlets from single nearest default 

E.SINK.F.32.C 

Ensemble convert floating-point quadlets from single ceiling 

E.SINK.F.32.C.D 

Ensemble convert floating-point quadlets from single ceiling default 

E.SINK.F.32.F 

Ensemble convert floating-point quadlets from single floor 

E.SINK.F.32.F.D 

Ensemble convert floating-point quadlets from single floor default 

E.SINK.F.32.N 

Ensemble convert floating-point quadlets from single nearest 

E.SINK.F.32.X 

Ensemble convert floating-point quadlets from single exact 

E.SINK.F.32.Z 

Ensemble convert floating-point quadlets from single zero 

E.SINK.F.32.Z.D 

Ensemble convert floating-point quadlets from single zero default 

E.SINK.F.64 

Ensemble convert floating-point octlets from double nearest default 

E.SINK.F.64.C 

Ensemble convert floating-point octlets from double ceiling 

E.SINK.F.64.C.D 

Ensemble convert floating-point octlets from double ceiling default 

E.SINK.F.64.F 

Ensemble convert floating-point octlets from double floor 

E.SINK.F.64.F.D 

Ensemble convert floating-point octlets from double floor default 

E.SINK.F.64.N 

Ensemble convert floating-point octlets from double nearest 

E.SINK.F.64.X 

Ensemble convert floating-point octlets from double exact 

E.SINK.F.64.Z 

Ensemble convert floating-point octlets from double zero 

E.SINK.F.64.Z.D 

Ensemble convert floating-point octlets from double zero default 

E.SINK.F.128 

Ensemble convert floating-point hexlet from quad nearest default 

E.SINK.F.128.C 

Ensemble convert floating-point hexlet from quad ceiling 

E.SINK.F.128.C.D 

Ensemble convert floating-point hexlet from quad ceiling default 

E.SINK.F.128.F 

Ensemble convert floating-point hexlet from quad floor 

E.SINK.F.128.F.D 

Ensemble convert floating-point hexlet from quad floor default 


Fig. 41 A (cont'd) 


E.SINK.F.128.N 

Ensemble convert floating-point hexlet from quad nearest \ 

E.SINK.F.128.X 

Ensemble convert floating-point hexlet from quad exact 

E.SINK.F.128.Z 

Ensemble convert floating-point hexlet from quad zero 

E.SINK.F.128.Z.D 

Ensemble convert floating-point hexlet from quad zero default 

E.SQR.F.16 

Ensemble square root floating-point half ; 

E.SQR.F.16.C 

Ensemble square root floating-point half ceiling 

E.SQR.F.16.F 

Ensemble square root floating-point half floor 

E.SQR.F.16.N 

Ensemble square root floating-point half nearest 

E.SQR.F.16.X 

Ensemble square root floating-point half exact 

E.SQR.F.16.Z 

Ensemble square root floating-point half zero 

E.SQR.F.32 

Ensemble square root floating-point single 

E.SQR.F.32.C 

Ensemble square root floating-point single ceiling 

E.SQR.F.32.F 

Ensemble square root floating-point single floor 

E.SQR.F.32.N 

Ensemble square root floating-point single nearest 

E.SQR.F.32.X 

Ensemble square root floating-point single exact 

E.SQR.F.32.Z 

Ensemble square root floating-point single zero 

E.SQR.F.64 

Ensemble square root floating-point double 

E.SQR.F.64.C 

Ensemble square root floating-point double ceiling 

E.SQR.F.64.F 

Ensemble square root floating-point double floor 

E.SQR.F.64.N 

Ensemble square root floating-point double nearest 

E.SQR.F.64.X 

Ensemble square root floating-point double exact 

E.SQR.F.64.Z 

Ensemble square root floating-point double zero 

E.SQR.F.128 

Ensemble square root floating-point quad 

E.SQR.F.128.C 

Ensemble square root floating-point quad ceiling 

E.SQR.F.128.F 

Ensemble square root floating-point quad floor 

E.SQR.F.128.N 

Ensemble square root floating-point quad nearest 

E.SQR.F.128.X 

Ensemble square root floating-point quad exact 

E.SQR.F.128.Z 

Ensemble square root floating-point quad zero 

E.SUM.F.16 

Ensemble sum floating-point half 

E.SUM.F.16.C 

Ensemble sum floating-point half ceiling 

E.SUM.F.16.F 

Ensemble sum floating-point half floor 

E.SUM.F.16.N 

Ensemble sum floating-point half nearest 

E.SUM.F.16.X 

Ensemble sum floating-point half exact 

E.SUM.F.16.Z 

Ensemble sum floating-point half zero 

E.SUM.F.32 

Ensemble sum floating-point single 

E.SUM.F.32.C 

Ensemble sum floating-point single ceiling 

E.SUM.F.32.F 

Ensemble sum floating-point single floor 

E.SUM.F.?2.N 

Ensemble sum floating-point single nearest 

E.SUM.F.32.X 

Ensemble sum floating-point single exact 

E.SUM.F.32.Z 

Ensemble sum floating-point single zero 


Fig. 41 A (cont'd) 


E.SUM.F.64 

Ensemble sum floating-point double 

E.SUM.F.64.C 

Ensemble sum floating-point double ceiling 

E.SUM.F.64.F 

Ensemble sum floating-point double floor 

E.SUM.F.64.N 

Ensemble sum floating-point double nearest 

E.SUM.F.64.X 

Ensemble sum floating-point double exact 

E.SUM.F.64.Z 

Ensemble sum floating-point double zero 

E.SUM.F.128 

Ensemble sum floating-point quad 

E.SUM.F.128.C 

Ensemble sum floating-point quad ceiling 

E.SUM.F.128.F 

Ensemble sum floating-point quad floor 

E.SUM.F.128.N 

Ensemble sum floating-point quad nearest 

E.SUM.F.128.X 

Ensemble sum floating-point quad exact 

E.SUM.F.128.Z 

Ensemble sum floating-point quad zero 


Selection 



op 

prec 

round/trap 

copy 

COPY 

16 32 64 128 

NONE X 

absolute 
value 

ABS 

16 32 64 128 

NONE X 

float from 
integer 

FLOAT 

16 32 64 128 

NONE C F N X Z 

integer 
from float 

SINK 

16 32 64 128 

NONE C F N X Z 
CDF.DZ.D 

increase 

format 

precision 

INFLATE 

16 32 64 

NONE X 

decrease 

format 

precision 

DEFLATE 

32 64 128 

NONE C F N X Z . 

negate 

NEG 

16 32 64 128 

NONE X 

reciprocal 
estimate 

RECEST 

16 32 64 128 

NONE X 

reciprocal 
square root 
estimate 

RSQREST 

16 32 64 128 

NONE X 

square root 

SQR 

16 32 64 128 

NONE C F N X Z 

sum 

SUM 

16 32 64 128 

NONE C F N X Z \ 


Fig. 41 A (cont'd) 


Format 

E.op.prec.round rd=rc 
rd=eopprecrou nd(rc) 

31 24 23 18 17 12 11 6 5 0 

I E.prec | rd | rc | op | E.UNARY I 

8 6 6 6 6 


Fig. 41 B 


Definition 

def EnsembleUnaryFloatingPointfop.prec.round.rd.rc) as 
c <- RegRead(rc, 128) 
case op of 

E.ABS.F, E.NEG.F, E.SQR.F: 
for i <— 0 to 128-prec by prec 
ci<-F(prec,Ci+p r ec-l..i) 
case op of 

E.ABS.F: 

ai.t <r- ci.t 
ai.s <r- 0 
ai.e <- ci.e 
ai.f <r- ci.f 
E.COPY.F: 
ai <r- ci 
E.NEG.F: 

ai.t <r- ci.t 
ai.s <- -ci.s 
ai.e <- ci.e 
ai.f <- ci.f 
E.RECEST.F: 

ai <- frecest(ci) 
E.RSQREST.F: 

ai <r- frsqrest(ci) 
E.SQR.F: 

ai <r- fsqr(ci) 

endcase 

ai+prec-L.i <- PackF(prec, ai, round) 
endfor 
E.SUM.F: 

p[0].t <- NULL 

for i <r- 0 to 128-prec by prec 

p[i+prec] <- fadd(p[i], F(prec,Cj + prec-1 ..i)) 
endfor 

a <- PackF(prec, p[128], round) 
E.SINK.F: 

for i <- 0 to 128-prec by prec 

ci<-F(prec,Cj +pre c-l..i) 

ai+prec-1..i <- fsinkr(prec, ci, round) 
endfor 
E.FLOAT.F: 

for i <r- 0 to 128-prec by prec 

ci.t <- NORM 

qi.e <- 0 

ci.s <- Cj+p rec .i ' 
ci.f <- ci.s ? 1+~Cj + p r ec-2..i : Cj+ pre c-2..i 
&i+prec-1..i <- PackF(prec, ci, round) 
endfor 


Fig. 41 C 


E. INFLATE. F: 

for i <- 0 to 64-prec by prec 
ci<-F(prec f Cj +P rec-1..i) 

a i+i+prec+prec-1..i+i <- PackF(prec+prec, ci, round) 
endfor 
E. DEFLATE. F: 

for i <r- 0 to 1 28-prec by prec 

ci«-F(prec,Cj +pre c-i..i) 

ai/2+prec/2-1..i/2 *- PackF(prec/2, ci, round) 
endfor 
ai27..64 <-0 

endcase 

RegWrite[rd, 128, a] 
enddef 


Exceptions 

Floating-point arithmetic 


Fig. 41 C (cont'd) 


E.MULG.8 

Ensemble multiply Galois field byte 

E.MUL.G.64 

Ensemble multiply Galois field octlet 


Fig. 42A 


Format 

E.MUL.G.size ra=rd,rc,rb 
ra=emulgsize(rd,rc,rb) 

31 24 23 18 17 12 11 6 5 0 

rTMUL.G.size I rd | rc | rb | ra 

8 6 6 6 6 

Fig.42B 


Definition 


def c <- PolyMultiply(size,a,b) as 

p[0] <- 0 2 * siz e 

for k <r- 0 to size-1 

p[k+1] <- p[k] A a k ? (0 size " k || b || 0 k ) : 0 2#sjze 

endfor 

c <r- p[size] 
enddef 

def c <r- PolyResidue(size,a,b) as 
p[0] <- a 

for k <- size-1 to 0 by -1 

p[k+1] <- p[k] * p[0] si ze + k ? (0 size ' k II 1 1 || b || 0 k ) : 0 2 *™e 
endfor 

c<-p[size] S ize-1..0 
enddef 

def EnsembleTernaryfop.size/d^c.rb.ra) as 
d <- RegRead(rd, 128) 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
case op of 

E.MUL.G: 

for i <— 0 to 128-size by size 

asize-1+i..i <- PolyResidue(sizePolyMul(size,c S i Z e-i+i.j,b S j ze .i+j..i),dsi Z e-i+i.j) 
endfor 

endcase 

RegWrite(ra, 128, a) 
enddef 


Exceptions 

none 


Fig. 42C 
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Ensemble multiply Galois field bytes 


Fig. 42D 


X.C0MPRESS.2 

Crossbar compress signed pecks 

X.C0MPRESS.4 

Crossbar compress signed nibbles ; 

X.C0MPRESS.8 

Crossbar compress signed bytes | 

X.C0MPRESS.16 

Crossbar compress signed doublets 

X.COMPRESS.32 

Crossbar compress signed quadlets 

X.COMPRESS.64 

Crossbar compress signed octlets 

X.COMPRESS.128 

Crossbar compress signed hexlet 

X.C0MPRESS.U.2 

Crossbar compress unsigned pecks 

X.C0MPRESS.U.4 

Crossbar compress unsigned nibbles 

X.C0MPRESS.U.8 

Crossbar compress unsigned bytes 

X.C0MPRESS.U.16 

Crossbar compress unsigned doublets 

X.COMPRESS.U.32 

Crossbar compress unsigned quadlets 

X.COMPRESS.U.64 

Crossbar compress unsigned octlets 

X.COMPRESS.U.128 

Crossbar compress unsigned hexlet 

X.EXPAND.2 

Crossbar expand signed pecks 

X.EXPAND.4 

Crossbar expand signed nibbles 

X.EXPAND.8 

Crossbar expand signed bytes 

X. EXPAND. 16 

Crossbar expand signed doublets 

X.EXPAND.32 

Crossbar expand signed quadlets 

X.EXPAND.64 

Crossbar expand signed octlets 

X.EXPAND.128 

Crossbar expand signed hexlet 

X.EXPAND.U.2 

Crossbar expand unsigned pecks 

X. EXPAND. U.4 

Crossbar expand unsigned nibbles 

X.EXPAND.U.8 

Crossbar expand unsigned bytes 

X.EXPAND.U.16 

Crossbar expand unsigned doublets 

X.EXPAND.U.32 

Crossbar expand unsigned quadlets 

X.EXPAND.U.64 

Crossbar expand unsigned octlets 

X.EXPAND.U.128 

Crossbar expand unsigned hexlet j 

X.R0TL.2 

Crossbar rotate left pecks 

X.R0TL.4 

Crossbar rotate left nibbles 

X.R0TL.8 

Crossbar rotate left bytes 

X.R0TL.16 

Crossbar rotate left doublets 

X.ROTL.32 

Crossbar rotate left quadlets 

X.ROTL64 

Crossbar rotate left octlets 

X.ROTL.128 

Crossbar rotate left hexlet 

X.R0TR.2 

Crossbar rotate right pecks 

X.ROTR.V 

Crossbar rotate right nibbles 

X.ROTR.8 

Crossbar rotate right bytes j 

X.ROTR.16 

Crossbar rotate right doublets 


Fig. 43A 


X.ROTR.32 

Crossbar rotate right quadlets 

X.ROTR.64 

Crossbar rotate right octlets 

X.ROTR.128 

Crossbar rotate right hexlet 

X.SHL.2 

Crossbar shift left pecks 

X.SHL.2.0 

Crossbar shift left signed pecks check overflow 

X.SHL.4 

Crossbar shift left nibbles 

X.SHL.4.0 

Crossbar shift left signed nibbles check overflow 

X.SHL.8 

Crossbar shift left bytes | 

X.SHL.8.0 

Crossbar shift left signed bytes check overflow 

X.SHL.16 

Crossbar shift left doublets 

X.SHL16.0 

Crossbar shift left signed doublets check overflow 

X.SHL.32 

Crossbar shift left quadlets j 

X.SHL.32.0 

Crossbar shift left signed quadlets check overflow 

X.SHL.64 

Crossbar shift left octlets 

X.SHL.64.0 

Crossbar shift left signed octlets check overflow 

X.SHL.128 

Crossbar shift left hexlet 

X.SHL.128.0 

Crossbar shift left signed hexlet check overflow 

X.SHLU.2.0 

Crossbar shift left unsigned pecks check overflow 

X.SHL.U.4.0 

Crossbar shift left unsigned nibbles check overflow 

X.SHL.U.8.0 

Crossbar shift left unsigned bytes check overflow 

X.SHL.U.16.0 

Crossbar shift left unsigned doublets check overflow 

X.SHL.U.32.0 

Crossbar shift left unsigned quadlets check overflow 

X.SHL.U.64.0 

Crossbar shift left unsigned octlets check overflow 

X.SHL.U.128.0 

Crossbar shift left unsigned hexlet check overflow 

X.SHR.2 

Crossbar signed shift right pecks 

X.SHR.4 

Crossbar signed shift right nibbles | 

X.SHR.8 

Crossbar signed shift right bytes 

X.SHR.16 

Crossbar signed shift right doublets 

X.SHR.32 

Crossbar signed shift right quadlets 

X.SHR.64 

Crossbar signed shift right octlets 

X.SHR.128 

Crossbar signed shift right hexlet 

X.SHR.U.2 

Crossbar shift right unsigned pecks 1 

X.SHR.U.4 

Crossbar shift right unsigned nibbles 

X.SHR.U.8 

Crossbar shift right unsigned bytes 

X.SHR.U.16 

Crossbar shift right unsigned doublets 

X.SHR.U.32 

Crossbar shift right unsigned quadlets 

X.SHR.U.64 

Crossbar shift right unsigned octlets 

X.SHR.U.128 

Crossbar shift right unsigned hexlet 


Fig. 43A (cont'd) 


Selection 


class 

op 

size 

precision 

EXPAND EXPAND.U 
COMPRESS 

COMPRESS. 

U 

2 4 8 16 32 64 128 

shift 

ROTR ROTL SHR SHL 
SHL.O SHL.U.O 
SHR.U 

2 4 8 16 32 64 128 


Format 

X.op.size rd=rc,rb 
rd=xopsize(rc,rb) 

31 252423 18 17 12 11 6 5 21 0 

| XSHIFT |s| rd | rc | rb | op |sz| 

7 1 6 6 6 4 2 

Isize <- log(size) 
s <- Isize2 
sz <- lsizei..o 


Fig. 43B 


Definition 

def Crossbar(op,size,rd,rc,rb) 
Or- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
shift <- b and (size-1) 
case OP5..2 II 0 2 of 
X.COMPRESS: 
hsize <r- size/2 

for i <r- 0 to 64-hsize by hsize 
if shift < hsize then 

a i+hsize-1..i <- c i+i+shift+hsize-1..i+i+shift 

else 

aj+hsize-1..i <- cf+^e?P II Cj+j +S ize-1..i+i+shift 

endif 
endfor 
ai27..64<~0 
X.COMPRESS.U: 
hsize <- size/2 
for i <- 0 to 64-hsize by hsize 
if shift < hsize then 

ai+hsize-1..i <- c i+i+shift+hsize-1..i+i+shift 

else 

ai+hstee-LJ <- 0 sm ' hsize || Ci+i+size-U+i+shift 

endif 
endfor 
ai27..64<-0 
X.EXPAND: 

hsize <- size/2 

for i <— 0 to 64-hsize by hsize 
if shift < hsize then 

aw +S ize-1..W <- c|j|te!f || c i+hsize-1..i II 0 shift 

else 

ai+i+size-1..i+i <- Ci+size-shift-1..i II ° shift 

endif 
endfor 
X.EXPAND.U: 

hsize ^- size/2 

for i <r- 0 to 64-hsize by hsize 
if shift < hsize then 

aw +8 to-1..W <" Ohsize-shift y j || O^hift 

else 

ai+j+size-1..i+i <- c i+size-shift-1 ..i II ° shift 

endif 
endfor 
X.ROTL: 

for i <- 0 to 128-size by size 

3i+size-1..i <- Ci +s j Z e-i.shift..i II Cj+ s ize-1..i+size-1 -shift 
endfor 


Fig. 43C 


X.ROTR: 

for i <— 0 to 128-size by size 

a i+size-1..i <~ c i+shift-1 ..i II Cj+ S jze-1 ..i+shift 
endfor 
X.SHL: 

for i <- 0 to 128-size by size 

ai+size-1..i <- Cj+size-1-shifU II 0 shift 
endfor 
X.SHL.O: 

for i <- 0 to 128-size by size 

if Cj+sjze-1 ..i+size-1 -shift * cp+slze 1 -1 -shift then 
raise FixedPointArithmetic 

endif 

ai+size-1..i <- Ci+size-1-shift..ill 0 shlfl 
endfor 
X.SHLU.O: 

for i <- 0 to 128-size by size 

if Ci+size-1..i+size-shift * 0 shift then 
raise FixedPointArithmetic 

endif 

ai+size-1..i «- Cj+ S ize-1 -shift. .ill 0 shift 
endfor 
X.SHR: 

for i <— 0 to 128-size by size 

ai+size-1..i <- cf+s^e-1 II c i+size-1..i+shift 
endfor 
X.SHR.U: 

for i <r- 0 to 1 28-size by size 

ai + size-1..i <- 0 shift || Cj+sjze.! J+Shift 
endfor 

endcase 

RegWrite(rd, 128, a) 
enddef 


Exceptions 

Fixed-point arithmetic 


Fig. 43C (cont'd) 


1 1 1 1 

: 1 1 « 

ill 1 mm 1 

\" 1 

\ / 

1 1 

i m 

m\ 


Compress 32 bits to 16, with 4-bit right shift 


Fig. 43D 


Operation codes 


X.SHL.M.2 

Crossbar shift left merge pecks 

X.SHL.M.4 

Crossbar shift left merge nibbles j 

X.SHL.M.8 

Crossbar shift left merge bytes 

X.SHLM.16 

Crossbar shift left merge doublets 

X.SHL.M.32 

Crossbar shift left merge quadlets 

X.SHLM.64 

Crossbar shift left merge octlets 

X.SHLM.128 

Crossbar shift left merge hexlet 

X.SHR.M.2 

Crossbar shift right merge pecks 

X.SHR.M.4 

Crossbar shift right merge nibbles 

X.SHR.M.8 

Crossbar shift right merge bytes 

X.SHR.M.16 

Crossbar shift right merge doublets 

X.SHR.M.32 

Crossbar shift right merge quadlets 

X.SHR.M.64 

Crossbar shift right merge octlets 

X.SHR.M.128 

Crossbar shift right merge hexlet 


Fig. 43E 


Format 


X.op.size rd@rc,rb 
rd=xopsize(rd,rc,rb) 

31 252423 18 17 12 11 6 5 21 0 

I XSHIFT |s| rd | rc I rb | op fsTI 


Isize <- log(size) 
s <- Isize2 
sz <- Isize i..o 

Fig 43F 


Definition 

def Crossbarlnplace(op,size,rd,rc,rb) as 
d <- RegRead(rd, 128) 
c <- RegRead(rc f 128) 
b <- RegRead(rb, 128) 
shift <- b and (size-1) ' 
for i 0 to 128-size by size 
case op of 

X.SHR.M: 

aj+size-1..i <- Ci+shift-1..i II di+size-1 ..i+shift 
X.SHL.M: 

ai+size-1..i <- di+size-1-shift..i II q+ahHMJ 

endfor 

RegWrite(rd, 128, a) 
enddef 

Exceptions 

none 


Fig 43G 


Operation c des 


X.COMPRESS.1.2 

Crossbar compress immediate signed pecks 

X.COMPRESS.1.4 

Crossbar compress immediate signed nibbles 

X.COMPRESS.1.8 

Crossbar compress immediate signed bytes 

X.COMPRESS.1.16 

Crossbar compress immediate signed doublets 

X.COMPRESS.I.32 

Crossbar compress immediate signed quadlets 

X.COMPRESS.I.64 

Crossbar compress immediate signed octlets 

X.COMPRESS.1.128 

Crossbar compress immediate signed hexlet 

X.COMPRESS.I.U.2 

Crossbar compress immediate unsigned pecks 

X.COMPRESS.I.U.4 

Crossbar compress immediate unsigned nibbles 

X.COMPRESS.I.U.8 

Crossbar compress immediate unsigned bytes 

X.COMPRESS.I.U.16 

Crossbar compress immediate unsigned doublets 

X.COMPRESS.I.U.32 

Crossbar compress immediate unsigned quadlets 

X.COMPRESS.I.U.64 

Crossbar compress immediate unsigned octlets 

X.COMPRESS.I.U.128 

Crossbar compress immediate unsigned hexlet 

X.EXPAND.I.2 

Crossbar expand immediate signed pecks 

X.EXPAND.I.4 

Crossbar expand immediate signed nibbles 

X.EXPAND.I.8 

Crossbar expand immediate signed bytes 

X.EXPAND.1.16 

Crossbar expand immediate signed doublets 

X.EXPAND.I.32 

Crossbar expand immediate signed quadlets 

X. EXPAND. 1. 64 

Crossbar expand immediate signed octlets 

X.EXPAND.1.128 

Crossbar expand immediate signed hexlet 

X.EXPAND.I.U.2 

Crossbar expand immediate unsigned pecks 

X.EXPAND.I.U.4 

Crossbar expand immediate unsigned nibbles 

X.EXPAND.I.U.8 

Crossbar expand immediate unsigned bytes 

X.EXPAND.I.U.16 

Crossbar expand immediate unsigned doublets 

X.EXPAND.I.U.32 

Crossbar expand immediate unsigned quadlets 

X.EXPAND.I.U.64 

Crossbar expand immediate unsigned octlets j 

X.EXPAND.I.U.128 

Crossbar expand immediate unsigned hexlet 

X.ROTL.I.2 

Crossbar rotate left immediate pecks 

X.ROTL.I.4 

Crossbar rotate left immediate nibbles 

X.ROTL.I.8 

Crossbar rotate left immediate bytes 

X.ROTLI.16 

Crossbar rotate left immediate doublets 

X.ROTL.I.32 

Crossbar rotate left immediate quadlets 

X.ROTL.I.64 

Crossbar rotate left immediate octlets 

X.ROTLI.128 

Crossbar rotate left immediate hexlet 

X.ROTR.I.2 

Crossbar rotate right immediate pecks 

X.ROTR.I.4 

Crossbar rotate right immediate nibbles 

X.ROTR.I.8 

Crossbar rotate right immediate bytes 

X.R0TR.I.16 

Crossbar rotate right immediate doublets 

X.ROTR.I.32 

Crossbar rotate right immediate quadlets 

X.ROTR.I.64 

Crossbar rotate right immediate octlets 

X.ROTR.1.128 

Crossbar rotate right immediate hexlet 


Fig. 43H 


X.SHLI.2 

Crossbar shift left immediate pecks 

X.SHL.I.2.0 

Crossbar shift left immediate signed pecks check overflow 

X.SHL.1.4 

Crossbar shift left immediate nibbles 

X.SHLI.4.0 

Crossbar shift left immediate signed nibbles check overflow 

X.SHLI.8 

Crossbar shift left immediate bytes 

X.SHLI.8.0 

Crossbar shift left immediate signed bytes check overflow 

X.SHLI.16 

Crossbar shift left immediate doublets 

X.SHLI.16.0 

Crossbar shift left immediate signed doublets check overflow 

X.SHLI.32 

Crossbar shift left immediate quadlets 

X.SHL.I.32.0 

Crossbar shift left immediate signed quadlets check overflow 

X.SHL.I.64 

Crossbar shift left immediate octlets 

X.SHLI.64.0 

Crossbar shift left immediate signed octlets check overflow 

X.SHL.1.128 

Crossbar shift left immediate hexlet 

X.SHLI. 128.0 

Crossbar shift left immediate signed hexlet check overflow 

X.SHLI.U.2.0 

Crossbar shift left immediate unsigned pecks check overflow 

X.SHLI. U.4.0 

Crossbar shift left immediate unsigned nibbles check overflow 

X.SHL.I.U.8.0 

Crossbar shift left immediate unsigned bytes check overflow 

X.SHLI.U.16.0 

Crossbar shift left immediate unsigned doublets check overflow 

X.SHLI. U.32.0 

Crossbar shift left immediate unsigned quadlets check overflow 

X.SHL.I.U.64.0 

Crossbar shift left immediate unsigned octlets check overflow 

X.SHLI.U.128.0 

Crossbar shift left immediate unsigned hexlet check overflow 

X.SHR.I.2 

Crossbar signed shift right immediate pecks 

X.SHR.I.4 

Crossbar signed shift right immediate nibbles 

X.SHR.I.8 

Crossbar signed shift right immediate bytes 

X.SHR.1.16 

Crossbar signed shift right immediate doublets 

X.SHR.I.32 

Crossbar signed shift right immediate quadlets 

X.SHR.I.64 

Crossbar signed shift right immediate octlets 

X.SHR.1.128 

Crossbar signed shift right immediate hexlet 

X.SHR.I.U.2 

Crossbar shift right immediate unsigned pecks 

X.SHR.I.U.4 

Crossbar shift right immediate unsigned nibbles 

X.SHR.I.U.8 

Crossbar shift right immediate unsigned bytes 

X.SHR.I.U.16 

Crossbar shift right immediate unsigned doublets 

X.SHR.I.U.32 

Crossbar shift right immediate unsigned quadlets 

X.SHR.I.U.64 

Crossbar shift right immediate unsigned octlets 

X.SHR.I.U.128 

Crossbar shift right immediate unsigned hexlet 


Fig. 43H (cont) 


Selection 


class 

op 

size 

precision 

COMPRESS.I 

COMPRESS. I.U EXPAND.I 
EXPAND.I.U 

2 4 8 16 32 64 128 

shift 

ROTL.I ROTR.I 
SHLI SHLI.O 
SHLI.U.O 
SHR.I SHR.I.U 

2 4 8 16 32 64 128 

copy 

COPY 



Format 


X.op.size rd=rc,shift 
rd=xopsize(rc,shift) 

31 24 23 18 17 12 11 6 5 0 

I XSHIFTI | rd | rc | simm | op | 

8 6 6 6 6 

t <r- 256-2*size+shift 

OP1..0 <~t7..6 

simm <- t5..o 


Fig. 431 


Definition 

def CrossbarShortlmmediate(op/d/c f simm) 
case (opi..o II simm) of 
0..127: 

size <- 128 
128..191: 

size <- 64 
192..223: 

size <- 32 
224..239: 

size <- 16 
240..247: 

size <- 8 
248..251: 

size <- 4 
2S2..253: 

size <r- 2 
254..2S5: 

raise Reservedlnstruction 

endcase 

shift <- (opo || simm) and (size-1) 
c <- RegRead(rc, 128) 
case (0P5..2 II 0 2 ) of 
X.COMPRESSJ: 
hsize <- size/2 

for i <r- 0 to 64-hsize by hsize 
if shift < hsize then 

a i+hsize-1..i c i+i+shift+hsize-1..i+i+shift 

else 

aj+hsize-1..i <- c?+^1zl'- z f II Cj+i+ S ize-1..i+i+shift 

endif 
endfor 
ai27..64<-0 
X.COMPRESS.I.U: 
hsize <r- size/2 

for i <r- 0 to 64-hsize by hsize 
if shift < hsize then 

a i+hsize-1..j <- Cj+j+ S hjft+hsize-1..i+i+shift 

else 

a|+hsize-1..i <- 0 shjft - hsize || Cj + j +siZ e-i..j + j +S hift 

endif 
endfor 
ai27..64<-0 


Fig. 43J 


X.EXPAND.I: 

hsize <- size/2 

for i <r- 0 to 64-hsize by hsize 
if shift < hsize then 

a j+i+S ize-1 ..i+i «- cftftBtf II c i+hsi2e . 1 j || 0»h« 

else 

ai+i+size-1..i+i <- Cj +s jze-shift-1..i II ° Shlft 

endif 
endfor 
X.EXPANDXU: 
hsize <- size/2 

for i 0 to 64-hsize by hsize 
if shift < hsize then 

aw + size-1..i+i <- Ohsize-shift y q +h8be . 1aJ || O^hift 

else 

aj+i+size-1..i+i <- ^i+size-shift-1 ..i II ° shift 

endif 
endfor 
X.SHL.I: 

for i <- 0 to 128-size by size 

ai+size-1..i <- Cj+ S ize-1-shift..ill 0 shift 
endfor 
X.SHL.I.O: 

for i <- 0 to 128-size by size 

if Cj+size-1..i+size-1 -shift * ^+size 1 -1 -shift then 
raise FixedPointArithmetic 

endif 

3i+size-1..i <- Cj+ S ize-1-shifull 0 shift 
endfor 
X.SHLI.U.O: 

for i <r- 0 to 128-size by size 

if q+size-1 ..i+size-shift * 0 shift then 
raise FixedPointArithmetic 

endif 

3i+size-1..i <- Cj+ s jze-1-shift..ill 0 sh,ft 
endfor 

Fig. 43J (cont) 


X.ROTR.I: 

for i <- 0 to 1 28-size by size 

3j+size-1..j <- Cj+ S hift-i..j || Cj+ S ize-1..i+shift 

endfor 
X.SHR.I: 

for i <- 0 to 1 28-size by size 

aj+size-1..i <- c? + h <& e -1 II c i+size-1..i+shift 
endfor 
X.SHR.I.U: 

for i <- 0 to 1 28-size by size 

aj+size-U <- 0 shift || Cj+size.! J+Shift 
endfor 

endcase 

RegWrite(rd, 128, a) 
enddef 


Exceptions 

Fixed-point arithmetic 
Reserved Instruction 

Fig. 43J (cont) 


Operation codes 


X.SHL.M.1.2 

Crossbar shift left merge immediate pecks 

X.SHLM.1.4 

Crossbar shift left merge immediate nibbles 

X.SHL.M.1.8 

Crossbar shift left merge immediate bytes 

X.SHL.M.1.16 

Crossbar shift left merge immediate doublets 

X.SHLM.I.32 

Crossbar shift left merge immediate quadlets 

X.SHLM.I.64 

Crossbar shift left merge immediate octlets 

X.SHLM.1.128 

Crossbar shift left merge immediate hexlet 

X.SHR.M.I.2 

Crossbar shift right merge immediate pecks 

X.SHR.M.I.4 

Crossbar shift right merge immediate nibbles 

X.SHR.M.I.8 

Crossbar shift right merge immediate bytes 

X.SHR.M.1.16 

Crossbar shift right merge immediate doublets 

X.SHR.M.I.32 

Crossbar shift right merge immediate quadlets 

X.SHR.M.I.64 

Crossbar shift right merge immediate octlets ! 

X.SHR.M.1.128 

Crossbar shift right merge immediate hexlet 


Fig 43K 


Format 


X.op.size rd@rc,shift 
rd=xopsize(rc,shift) 

31 24 23 18 17 12 11 6 5 0 

I XSHIFTI | rd I rc | simm | op | 

8 6 6 6 6 

t <- 256-2*size+shift 
OP1..0 <- 17..6 
simm <— 15 .o 

Fig 43L 


\ 


Definition 

def CrossbarShortlmmediatelnplace(op t rd,rc,simm) 
case (opi..o II simm) of 
0..127: 

size*- 128 
128.. 191: 

size <- 64 
192..223: 

size <- 32 
224..239: 

size <- 16 
240..247: 

size <- 8 
248..251: 

size <- 4 
2S2..253: 

size <- 2 
2S4..255: 

raise Reserved Instruction 

endcase 

shift <- (opo || simm) and (size-1) 
c <- RegRead(rc, 128) 
d <r- RegRead(rd, 128) 
for i 0 to 128-size by size 
case (0P5..2 II 0 2 ) of 
X.SHR.M.I: 

a i+size-1..i <- Cj+shift-1..i II d i+size-1..i+shift 
X.SHLM.I: 

3i+size-1..i <- d i+size-1-shift..i II Cj+shift-U 

endcase 
endfor 

RegWrite(rd, 128, a) 
enddef 

Exceptions 

Reserved Instruction 

Fig 43M 


Format 


X. EXTRACT ra=rd,rc,rb 
ra=xextract(rd,rc,rb) 

31 24 23 18 17 12 11 65 0 

I op I rd | rc | rb | ra | 


Fig. 44A 


Definition 


def CrossbarExtract(op,ra,rb,rc,rd) as 
d <- RegRead(rd, 128) 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
case bs..o of 
0..255: 

gsize <- 128 
256..383: 

gsize <~ 64 
384..447: 

gsize <- 32 
448..479: 

gsize <- 16 
480..495: 

gsize <- 8 
496..503: 

gsize «- 4 
504..507: 

gsize <- 2 
508..511: 

gsize <- 1 

endcase 
m <- b-|2 

as <- signed <- b-)4 
h <- (2-m)*gsize 

spos <- (b8..o) and ((2-m)*gsize-1) 
dpos <- (0 || b23..i6) and (gsize-1) 
sfsize <r- (0 || b3<|„24) and (gsize-1) 

tfsize <- (sfsize = 0) or ((sfsize+dpos) > gsize) ? gsize-dpos : sfsize 
fsize <- (tfsize + spos > h) ? h - spos : tfsize 
for i <— 0 to 128-gsize by gsize 
case op of 

X. EXTRACT: 
if m then 

P<-dgsize+M..i 

else 

P<-(d|| c) 2 *(gsize+i)-1..2*i 

endif 

endcase 

v<-(as&Ph-i)||p 

w <- (as & v sp0 s + fsize~l) 9siz * fsiz ^ pos II Vf S ize-1 + spos..spos II 0 d P os 
if m then 

a size-1+Li <- Ogsize-1+i..dpos+fsize+i II w dpos+fsize-1..dpos II Gdpos-1+1..i 

else 

a size-1+Li<- w 

endif 

endfor 

RegWrite(ra, 128, a) 
enddef 


Exceptions 

none 


Fig. 44B 


< — fsizo — x opos 



< — fs i z e — >< c l pos 


Crossbar extract 
Fig. 44C 


< — fsizo — x spos 



< — fstze — > < dpos 


Crossbar merge extract 


Fig. 44D 


Operation codes 


E.MUL.X 

Ensemble multiply extract 

E. EXTRACT 

Ensemble extract 

E.SCAL.ADD.X 

Ensemble scale add extract 


Fig. 44E 


Format 


E.op ra=rd,rc,rb 


ra=eop(rd,rc,rb) 

31 24 23 18 17 12 11 65 

I op | rd | rc | rb | ra 


Fig. 44F 


def mul(size,h,vs f v,i,ws,w,j) as 

mul <- ((vs&v si ze-1 + i) h - size II v S ize-1+Li) * ((ws&w S j Z e-1 + j) h - size II w S j Z e-1 + j..j) 
enddef 

def EnsembleExtract(op,ra,rb,rc,rd) as 
d <- RegRead(rd, 128) 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
case b8..o of 
0..255: 

sgsize <- 1 28 
256..383: 

sgsize <- 64 
384.. 447: 

sgsize <- 32 
448..479: 

sgsize <- 16 
480..495: 

sgsize <- 8 
496..503: 

sgsize <- 4 
504..507: 

sgsize <r- 2 
508..511: 

sgsize <r- 1 

endcase 
l<-bn 
m <— b-|2 
n<-bi3 
signed <- bi4 
case op of 

E. EXTRACT: 

gsize <- sgsize 

h <- (2-m)*gsize 

as <r- signed 

spos <- (b8..o) and ((2-m)*gsize-1) 
E.SCAL.ADD.X: 

if (sgsize < 8) then 

gsize 8 
elseif (sgsize*(n+1) > 32) then 

gsize <-32/(n+1) 

else 

gsize sgsize 

endif 

ds <- cs <- signed 

bs <r- signed A m 

as <- signed or m or n 

h <r- (2*gsize) + 1 + n 

spos <- (b8..o) and (2*gsize-1) 

Fig. 44G 


E.MULX: 

if (sgsize < 8) then 

gsize <- 8 
elseif (sgsize*(n+1 ) > 1 28) then 

gsize <- 128/(n+1) 

else 

gsize <- sgsize 

end if 

ds <r- signed 

cs <r- signed A m 

as <- signed or m or n 

h <- (2*gsize) + n 

spos <- (b8..o) and (2*gsize-1) 

endcase 

dpos <- (0 || b23..i6) and (gsize-1) 
r <- spos 

sfsize <- (0 || b3i..24) and (gsize-1) 

tfsize <- (sfsize = 0) or ((sfsize+dpos) > gsize) ? gsize-dpos : sfsize 
fsize (tfsize + spos > h) ? h - spos : tfsize 
if ( b l0..9 = z ) and not as then 
rnd <r- F 

else 

rnd <- bio..9 

end if 

for i <r- 0 to 128-gsize by gsize 
case op of 

E. EXTRACT: 
if m then 

P <- dgsize+i-1..j 

else 

p<-(d ||c)2*(gsize+iH"2*i 

endif 
E.MULX: 
if n then 

if (i and gsize) = 0 then 

p <- muKgsize.h.ds.d.i.cs.cJ) - mul(gsize f h,ds t d,i+size t cs,cj+size) 

else 

p <- muKgsize.h.ds.dj.cs.c.i+size) + muKgsize.h.ds.dJ.cs.c.i+size) 

endif 

else 

p <- muKgsize.h.ds.d.i.cs.c.i) 

endif 


Fig. 44G (cont) 


E.SCALADD.X: 
if n then 

if (i and gsize) = 0 then 

p <- mul(gsize l h t ds,d 1 i t bs f b,64+2*gsize) 
+ mul(gsize t h,cs,c,i,bs l b,64) 

- mul(gsize,h,ds l d,i+gsize l bs t b,64+3*gsize) 

- mul(gsize,h,cs,c,i+gsize,bs,b,64+gsize) 

else 

p mul(gsize I h,ds,d l i,bs,b,64+3*gsize) 
+ mul(gsize,h t cs t c l i,bs,b,64+gsize) 
+ mul(gsize,h,ds,d,i+gsize,bs 1 b,64+2*gsize) 
+ mul(gsize,h,cs,c,i+gsize,bs,b,64) 

endif 

else 

p <r- mul(gsize,h,ds,d l i l bs,b l 64+gsize) + mul(gsize,h,cs I c,i l bs I b,64) 

endif 

endcase 
case md of 
N: 

s^O h - r ||-PrllPr 1 

Z: 

s«-0»"|l PFi-1 

F: 

s<-0 h 

C: 

s<-0 h " r || 1 r 

endcase 

v<-((as&p h -i)||p) + (0||s) 

if (Vh..r+fsize = (as & v r+ f S j ze -l) h+1 - r - fsi2e ) or not (I and (op = E. EXTRACT)) then 
w ^ (as & v r+fs i Z e.i)9 si2e - fsize - d P 0S || Vf S j Z e.i + r..r II 0 d P° s 

else 

w ( s ? (vh || ~vfl sjze ~ d P° s ~ 1 ) : iQsize-dposj || gdpos 

endif 

if m and (op = E. EXTRACT) then 

a size-1+Li <- c gsize-1+Ldpos+fsize+i II w dpos+fsize-1..dpos II c dpos-1+1..i 

else 

asize-1+i..i^- w 

endif 
endfor 

RegWrite(ra, 128, a) 
enddef 

Exceptions 


none 


Fig. 44G (cont) 


X.DEP0SIT.2 

Crossbar deposit signed pecks 

X.DEP0SIT.4 

Crossbar deposit signed nibbles 

X.DEP0SIT.8 

Crossbar deposit signed bytes 

X.DEP0SIT.16 

Crossbar deposit signed doublets 

X.DEPOSIT.32 

Crossbar deposit signed quadlets 

X.DEPOSIT.64 

Crossbar deposit signed octlets 

X.DEPOSIT.128 

Crossbar deposit signed hexlet 

X.DEP0SIT.U.2 

Crossbar deposit unsigned pecks 

X.DEP0SIT.U.4 

Crossbar deposit unsigned nibbles 

X.DEP0SIT.U.8 

Crossbar deposit unsigned bytes 

X.DEP0SIT.U.16 

Crossbar deposit unsigned doublets I 

X.DEPOSIT.U.32 

Crossbar deposit unsigned quadlets 

X.DEPOSIT.U.64 

Crossbar deposit unsigned octlets 

X.DEPOSIT.U.128 

Crossbar deposit unsigned hexlet 

X.WITHDRAW.U.2 

Crossbar withdraw unsigned pecks I 

X.WITHDRAW.U.4 

Crossbar withdraw unsigned nibbles 

X.WITHDRAW.U.8 

Crossbar withdraw unsigned bytes 

X.WITHDRAW.U.16 

Crossbar withdraw unsigned doublets 

X.WITHDRAW.U.32 

Crossbar withdraw unsigned quadlets 

X.WITHDRAW.U.64 

Crossbar withdraw unsigned octlets 

X.WITHDRAW.U.128 

Crossbar withdraw unsigned hexlet 

X.WITHDRAW.2 

Crossbar withdraw pecks 

X.WITHDRAW.4 

Crossbar withdraw nibbles 

X.WITHDRAW.8 

Crossbar withdraw bytes 

X.WITHDRAW.16 

Crossbar withdraw doublets 

X.WITHDRAW.32 

Crossbar withdraw quadlets 

X.WITHDRAW.64 

Crossbar withdraw octlets 

X.WITHDRAW.128 

Crossbar withdraw hexlet 


Fig. 45A 


Equivalencies 


X. SEX. 1.2 

Crossbar extend immediate signed pecks j 

X. SEX. 1.4 

Crossbar extend immediate signed nibbles 

X.ofcX./.o 

Crossbar extend immediate signed bytes 

X.SEX.1.16 

Crossbar extend immediate signed doublets 

X. SEX. 1.32 

Crossbar extend immediate signed quadlets 

X. SEX. 1.64 

Crossbar extend immediate signed octlets 

X. SEX. 1.1 28 

Crossbar extend immediate signed hexlet 

j\.Lc.A.\.*t 

Crossbar extend immediate unsigned pecks 

X.ZEX.1.4 

Crossbar extend immediate unsigned nibbles 

X.ZEX.1.8 

Crossbar extend immediate unsigned bytes 

X.ZEX.1.16 

Crossbar extend immediate unsigned doublets 

X.ZEX.1.32 

Crossbar extend immediate unsigned quadlets 

X.ZEX.1.64 

Crossbar extend immediate unsigned octlets 

X.ZEXI.128 

Crossbar extend immediate unsigned hexlet 


X.SHL.I.gsize rd-rcj 

-» 

X.DEPOSIT.gsize rd=rc,size-i,i 

X.SHR.I.gsize rd=rc,i 

-> 

X.WITHDRAW.gsize rd=rc,size-i,i 

X.SHRU.I.gsize rd=rc,i 

-> 

X.WITHDRAW.U.gsize rd=rc,size-i,i 

X.SEX.I.gsize rd=rc,i 


X.DEPOSIT.gsize rd=rc,i,0 | 

X.ZEX.I.gsize rd=rc,i 


XDEPOSIT.U.gsize rd=rc,i,0 

Redundancies 

X.DEPOSIT.gsize rd=rc,gsize,0 

<=> 

X.COPY rd=rc 

X.DEPOSIT.U.gsize rd=rc,gsize,0 

<=> 

X.COPY rd=rc 

X.WITHDRAW.gsize rd=rc,gsize,0 

<=> 

X.COPY rd=rc 

X.WITHDRAW.U.gsize rd=rc,gsize,0 

<=> 

X.COPY rd=rc 


Fig. 45A (cont'd) 


Format 


X.op.gsize rd=rc,isize,ishift 
rd=xopgsize(rc,isize,ishift) 

31 2625 2423 18 17 12 11 65 0 

I op I jjj I rd | rc | gsfp I gsfs | 

6 2 6 6 6 6 

assert isize+ishift < gsize 
assert isize>1 

iho || gsfs <- 128-gsize+isize-1 
ihi || gsfp <r~ 128-gsize+ishift 

Fig. 45B 


Definition 

def CrossbarField(op,rd l rc,gsfp t gsfs) as 
c <- RegRead(rc, 128) 
case ((opi || gsfp) and (opo || gsfs)) of 
0..63: 

gsize <- 128 
64..95: 

gsize 64 
96.. 111: 

gsize <- 32 
112.. 119: 

gsize <- 1 6 
120..123: 

gsize <- 8 
124..125: 

gsize <- 4 

126: 

gsize <- 2 

127: 

raise Reserved Instruction 

endcase 

ishift <r- (opi || gsfp) and (gsize-1) 
isize <- ((opo || gsfs) and (gsize-1 ))+1 
if (ishift+isize>gsize) 

raise Reserved Instruction 

end if 

case op of 

X.DEPOSIT: 

for i <- 0 to 1 28-gsize by gsize 

a i+ gsize-1.J <- cgsge^ze-ishift ^ ze ^j || O^hifl 
endfor 
X.DEPOSIT.U: 

for i <— 0 to 1 28-gsize by gsize 

ai + gsize-1..i <- OSsize-isize-ishift c ^ slze ^j || O^hift 
endfor 
X.WITHDRAW: 

for i <- 0 to 1 28-gsize by gsize 

ai+gsize-1..i <- c f+fs1zl+ishift-1 II Cj+isize+ishift-1 ..i+ishift 
endfor 
X.WITHDRAW.U: 

for i <r- 0 to 1 28-gsize by gsize 

ai+gsize-1..i <- 09 size ' isize || c i+ isize+ishift-1..i+ishift 
endfor 

endcase 

RegWrite(rd, 128, a) 
enddef 

Exceptions 

Reserved instruction 

Fig. 45C 



ih 0 ||gsfs 127 


encoding for crossbar field 
Fig. 45D 


< — fsizo — ► 



< — f 8i z e — >< dpoa 


crossbar deposit 
Fig, 45E 


< — fs i z e — ► 

crossbar withdraw 


Fig. 45F 


Op ration codes 


A.UtrUol 1 .IVI.Z 

Crossbar deposit merge pecks 

X.DEP0SIT.M.4 

Crossbar deposit merge nibbles 

X.DEP0SIT.M.8 

Crossbar deposit merge bytes 

X.DEP0SIT.M.16 

Crossbar deposit merge doublets 

X.DEPOSIT.M.32 

Crossbar deposit merge quadlets 

X.DEPOSIT.M.64 

Crossbar deposit merge octlets 

X.DEPOSIT.M.128 

Crossbar deposit merge hexlet 


Fig 45G 


Format 


X.op.gsize rd@rc,isize,ishift 
rd=xopgsize(rd,rc,isize,ishift) 

31 26252423 18 17 1211 65 0 

I op | ih | rd | rc 1 gsfp | gsfs 1 

6 2 6 6 6 6 

assert isize+ishift < gsize 
assert isize>1 

iho || gsfs <r- 128-gsize+isize-1 
ihi || gsfp 128-gsize+ishift 


Fig 45H 


Definition 

def CrossbarFieldlnplace(op,rd,rc,gsfp,gsfs) as 
c<- RegRead(rc, 128) 
d <r- RegRead(rd, 128) 
case ((opi || gsfp) and (opo || gsfs)) of 
0..63: 

gsize <- 128 
64..9S: 

gsize <r- 64 
96..111: 

gsize <- 32 
112..119: 

gsize <- 16 
120.. 123: 

gsize <- 8 
124.. 125: 

gsize <- 4 

126: 

gsize <- 2 

127: 

raise Reservedlnstruction 

endcase 

ishift <- (opi || gsfp) and (gsize-1) 
isize <- ((opo || gsfs) and (gsize-1 ))+1 
if (ishift+isize>gsize) 

raise Reservedlnstruction 

endif 

for i ^ 0 to 128-gsize by gsize 

a i+gsize-1..i <~ ^i+gsize-1 ..i+isize+ishift II Cj+j S ize-1..i II dj+ishift-1..i 
endfor 

RegWrite(rd, 128, a) 
enddef 

Exceptions 

Reserved instruction 

Fig 45! 


< — fsiz e — >< dpoa 
crossbar deposit merge 

Fig 45J 


X.SHUFFLE.4 

Crossbar shuffle within pecks 

X.SHUFFLE.8 

Crossbar shuffle within bytes 

X.SHUFFLE.16 

Crossbar shuffle within doublets 

X.SHUFFLE.32 

Crossbar shuffle within quadlets 

X.SHUFFLE.64 

Crossbar shuffle within octlets 

X.SHUFFLE.128 

Crossbar shuffle within hexlet 

X.SHUFFLE.256 

Crossbar shuffle within triclet i 


Fig. 46A 


Format 


X.SHUFFLE.256 rd=rc,rb,v,w I h 
X.SHUFFLE.size rd=rcb,v,w 

rd=xshuffle256(rc,rb,v,w,h) 
rd=xshufflesize(rcb,v,w) 

31 24 23 18 17 12 11 6 5 0 

I xiSHUFFLE I rd | rc I rb | op I 

8 6 6 6 6 

rc <- rb <- rcb 
x<-log2(size) 
y<-log2(v) 
z<-log2(w) 

op <- ((x*x*x-3*x*x-4*x)/6-(z*z-z)/2+x*z+y) + (size=256)*(h*32-56) 


Fig. 46B 


Definition 


def CrossbarShuffle(major,rd t rc t rb l op) 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
if rc=rb then 
case op of 
0..55: 

for x 2 to 7; for y <- 0 to x-2; for z <- 1 to x-y-1 

if op = ((x*x*x-3*x*x-4*x)/6-(z*z-z)/2+x*z+y) then 
fori«-0to 127 

a » <~ c 06..x II iy+z-1..y II ix-1..y+z II iy-1..o) 

end 

endif 

endfor; endfor; endfor 
56..63: 

raise Reserved Instruction 

endcase 

eiseif 

case op4.,o of 
0..27: 

cb <- c || b 
x <- 8 
h <-0P5 

for y <- 0 to x-2; for z <- 1 to x-y-1 

if op 4 „o = ((1 7*z-z*z)/2-8+y) then 
fori<-h*128 to127+h*128 

aj.h*l28 <- cbty^ y || ix.L.y+2 || iy-1 o) 

end 

endif 
endfor; endfor 
28..31: 

raise Reserved Instruction 

endcase 

endif 

RegWrite(rd, 128, a) 
enddef 


Exceptions 

Reserved Instruction 


Fig. 46C 


127 rcb(128) o 



127 


rd(128) 


4-way shuffle bytes within hexlet 


Fig. 46D 


255 rcJ128) 128127 rb{128) o 




f 

■RWi^talliitai^ l®m\ 
127 rd(128) o 


4-way shuffle bytes within triclet 
Fig. 46E 


Format 


X.SWIZZLE rd=rc,icopy,iswap 
rd=xswizzle(rc,icopy,iswap) 

31 26 2524 23 18 17 12 11 6 5 0 

fx^WIZZLE | ih | rd | rc | icopya | iswap7~| 

6 2 6 6 6 6 

icopya <- icopy5..o 
iswapa <- iswaps„o 
ih <- icopy6 || iswap6 

Fig. 47A 


Definition 

def GroupSwizzlelmmediate(ih,rd l rc,icopya t iswapa) as 
icopy <r- ihi || icopya 
iswap <- iho || iswapa 
c <r- RegRead(rc, 128) 
fori <-0 to 127 

a i <- c (i & icopy) A iswap 
endfor 

RegWrite(rd, 128, a) 
enddef 

Exceptions 

none 

Fig. 47B 



16-bit reverse 


Fig. 47C 


I X.SELECT.8 | Crossbar select bytes 

Format 


op ra=rd,rc,rb 


ra=op(rd,rc,rb) 

31 

IZZ 


op_ 


24 23 

zn 


rd 


18 17 

zn 


rc 


6 

Fig. 47D 


12 11 

zn 


rb 


6 5 

in 


ra 


Definition 

def CrossbarTernary(op,rd,rc f rb,ra) as 
d <- RegRead(rd, 128) 
c <- RegRead(rc t 128) 
b <- RegRead(rb, 128) 
dc <- d || c 
fori<-0to15 

j <- b8*j+4..8*i 

38*i+7..8*i <~ dcs*j+7..8*j 
endfor 

RegWrite(ra, 128, a) 
enddef 

Exceptions 

none 

Fig. 47E 


Pin summary 


A20M# 

I 

Address bit 20 Mask is an emulator signal. 

A31..A3 

IO 

Address, in combination with byte enable, indicate the 
physical addresses of memory or device that is the target 
of a bus transaction. This signal is an output, when the 
processor is initiating the bus transaction, and an input 
when the processor is receiving an inquire transaction or 
snooping another processor's bus transaction. 

ADS# 

IO 

ADdress Strobe, when asserted, indicates new bus 
transaction by the processor, with valid address and byte 
enable simultaneously driven. 

ADSC# 

0 

Address Strobe Copy is driven identically to address 
strobe 

AHOLD 

I 

Address HOLD, when asserted, causes the processor to 
cease driving address and address parity in the next bus 
clock cycle. 

AP 

IO 

Address Parity contains even parity on the same cycle as 
address. Address parity is generated by the processor 
when address is an output, and is checked when address 
is an input. A parity error causes a bus error machine 
check. 

APCHK# 

0 

Address Parity CHecK is asserted two bus clocks after 
EADS# if address parity is not even parity of address. I 

APICEN 

I 

Advanced Programmable Interrupt Controller ENable 

is not implemented. 

BE7#..BE0# 

IO 

Byte Enable indicates which bytes are the subject of a 
read or write transaction and are driven on the same cycle 
as address. 

BF1..BF0 

I 

Bus Frequency is sampled to permit software to select 
the ratio of the processor clock to the bus clock. 

BOFF# 

I 

BackOFF is sampled on the rising edge of each bus clock, 
and when asserted, the processor floats bus signals on the 
next bus clock and aborts the current bus cycle, until the 
backoff signal is sampled negated. 

BP3..BP0 

o 

Breakpoint is an emulator signal. 

BRDY# 

I 

Bus ReaDY indicates that valid data is present on data on 
a read transaction, or that data has been accepted on a 
write transaction. , [ 

BRDYC# 

I 

Bus ReaDY Copy is identical to BRDY#; asserting either 
signal has the same effect. 

BREQ 

0 

Bus REQuest indicates a processor initiated bus request. 


Fig. 48 


BUSCHK# 

1 

BUS CHecK is sampled on the rising edge of the bus 
clock, and when asserted, causes a bus error machine 
check. 

CACHE# 

0 

CACHE, when asserted, indicates a cacheable read 
transaction or a burst write transaction. 

CLK 

1 

bus CLocK provides the bus clock timing edge and the 
frequency reference for the processor clock. 

CPUTYP 

1 

CPU TYPe, if low indicates the primary processor, if high, 
the dual processor. 

D/C# 

1 

Data/Code is driven with the address signal to indicate 
data, code, or special cycles. 

D63..D0 

IO 

Data communicates 64 bits of data per bus clock. 

D/P# 

0 

Dual/Primary is driven (asserted, low) with address on 
the primary processor 

DP7..DP0 

10 

Data Parity contains even parity on the same cycle as 
data. A parity error causes a bus error machine check. 

DPEN# 

10 

Dual Processing Enable is asserted (driven low) by a 
Dual processor at reset and sampled by a Primary 
processor at the falling edge of reset. 

EADS# 

1 

External Address Strobe indicates that an external 
device has driven address for an inquire cycle. 

EWBE# 

1 

External Write Buffer Empty indicates that the external 
system has no pending write. 

FERR# 

0 

Floating point ERRor is an emulator signal. 

FLUSH# 

1 

cache FLUSH is an emulator signal. 

FRCMC# 

1 

Functional Redundancy Checking Master/Checker is 

not implemented. 

HIT# 

IO 

HIT indicates that an inquire cycle or cache snoop hits a 
valid line. 

HITM# 

10 

HIT to a Modfied line indicates that an inquire cycle or 
cache snoop hits a sub-block in the M cache state. 

HLDA 

0 

bus HoLD Acknowlege is asserted (driven high) to 
acknowlege a bus hold request 

HOLD 

1 

bus HOLD request causes the processor to float most of 
its pins and assert bus hold acknowlege after completing 
all outstanding bus transactions, or during reset. 

IERR# 

0 

Internal ERRor is an emulator signal. 

IGNNE# 

1 

IGNore Numeric Error is an emulator signal. 

INIT 

1 

INITialization is an emulator signal. 

INTR 

1 

maskable INTeRrupt is an emulator signal. 

INV 

1 

INValidation controls whether to invalidate the addressed 
cache sub-block on an inqure transaction. 


Fig. 48 (cont'd) 


KEN# 

I 

Cache ENable is driven with address to indicate that the 
read or write transaction is cacheable. 

LINT1..LINT0 

I 

Local INTerrupt is not implemented. 

LOCK# 

0 

bus LOCK is driven starting with address and ending 
after bus ready to indicate a locked series of bus 
transactions. 

M/IO# 

0 

Memory/Input Output is driven with address to indicate a 
memory or I/O transaction. 

NA# 

I 

Next Address indicates that the external system will 
accept an address for a new bus cycle in two bus clocks. 

NMI 

I 

Non Maskable Interrupt is an emulator signal. 

PBGNT# 

10 

Private Bus GraNT is driven between Primary and Dual 
processors to indicate that bus arbitration has completed, 
granting a new master access to the bus. 

PBREQ# 

IO 

Private Bus REQuest is driven between Primary and Dual 
processors to request a new master access to the bus. 

PCD 

0 

Page Cache Disable is driven with address to indicate a 
not cacheable transaction. 

PCHK# 

0 

Parity CHecK is asserted (driven low) two bus clocks after 
data appears with odd parity on enabled bytes. 

PHIT# 

IO 

Private HIT is driven between Primary and Dual 
processors to indicate that the current read or write 
transaction addresses a valid cache sub-block in the slave 
processor. 

PHITM# 

IO 

Private HIT Modified is driven between Primary and Dual 
processors to indicate that the current read or write 
transaction addresses a modified cache sub-block in the 
slave processor. 

PICCLK 

I 

Programmable Interrupt Controller CLocK is not 

implemented. 

PICD1..PICD 
0 

IO 

Programmable Interrupt Controller Data is not 

implemented. 

PEN# 

I 

Parity Enable, if active on the data cycle, allows a parity 
error to cause a bus error machine check. 

PM1..PM0 

0 

Performance Monitoring is an emulator signal. 

PRDY 

0 

Probe ReaDY is not implemented. 

PWT 

0 

Page Write Through is driven with address to indicate a 
not write allocate transaction. 

R/S# 

I 

Run/Stop is not implemented. 

RESET 

I 

RESET causes a processor reset. 

SCYC 

0 

Split CYCIe is asserted during bus lock to indicate that 
more than two transactions are in the series of bus 
transactions. 


Fig. 48 (cont'd) 


SMI# 

1 

System Manag ment Int rrupt is an emulator signal. 

SMIACT# 

0 

System Management Interrupt ACTive is an emulator 
signal. 

STPCLK# 

1 

SToP CLocK is an emulator signal. 

TCK 

1 

Test CLocK follows IEEE 1 149.1 . 

TDI 

1 

Test Data Input follows IEEE 1149.1. 

TDO 

0 

Test Data Output follows IEEE 1 149.1 . 

TMS 

1 

Test Mode Select follows IEEE 1 149.1. 

TRST# 

1 

Test ReSeT follows IEEE 1149.1. 

VCC2 

1 

VCC of 2.8V at 25 pins 

VCC3 

1 

VCC of 3.3V at 28 pins 

VCC2DET# 

0 

VCC2 DETect sets appropriate VCC2 voltage level. 

VSS 

1 

VSS supplied at 53 pins 

W/R# 

0 

Write/Read is driven with address to indicate write vs. 
read transaction. 

WB/WT# 

1 

Write Back/Write Through is returned to indicate that 
data is permitted to be cached as write back. 


Fig. 48 (cont'd) 


Electrical Specifications 


uiock raxe 

66 MHz 

75 MHz 

100 MHz 

133 MHz 


rarameter 

min 

max 

min 

max 

min 

max 

min 

max 

unit 

CLK frequency 

33.3 

66.7 

37.5 

75 

50 

a r\r\ 

100 


A O O 

133 

MH 

j 

CLK period 

15.0 

30.0 

13.3 

26.3 

10.0 

20.0 



ns 

CLK high time (>2v) 

4.0 


4.0 


3.0 




ns 

CLK low time (<0.8V) 

4.0 


4.0 


3.0 




ns 

CLK rise time (0.8V->2V) 

0.15 

1.5 

0.15 

1.5 

0.15 

1.5 



ns 

CLK fall time (2V->0.8V) 

0.15 

1.5 

0.15 

1.5 

0.15 

1.5 



ns 

CLK period stability 


250 


250 


250 



ps 


Fig. 49A 


A31..3 valid delay 

1.1 

6.3 

1.1 

4.5 

1.1 

4.0 



ns 

A31..3 float delay 


10.0 


7.0 


7.0 


< 

ns 

ADS# valid delay 

1.0 

6.0 

1.0 

4.5 

1.0 

4.0 



ns 

ADS# float delay 


10.0 


7.0 


7.0 



ns 

ADSC# valid delay 

1.0 

7.0 

1.0 

4.5 

1.0 

4.0 



ns 

ADSC# float delay 


10.0 


7.0 


7.0 



ns 

AP valid delay 

1.0 

8.5 

1.0 

5.5 

1.0 

5.5 



ns 

AP float delay 


10.0 


7.0 


7.0 



ns 

APCHK# valid delay 

1.0 

8.3 

1.0 

4.5 

1.0 

4.5 



ns 

BE7..0# valid delay 

1.0 

7.0 

1.0 

4.5 

1.0 

4.0 



ns 

BE7..0# float delay 


10.0 


7.0 


7.0 



ns 

BP3..0 valid delav 

1.0 

10.0 







ns 

BREQ valid delay 

1.0 

8.0 

1.0 

4.5 

1.0 

4.0 



ns 

CACHE# valid delay 

1.0 

7.0 

1.0 

4.5 

1.0 

4.0 



ns 

CACHE# float delay 


10.0 


7.0 


7.0 



ns 

D/C# valid delay 

1.0 

7.0 

1.0 

4.5 

1.0 

4.0 



ns 

D/C# float delay 


10.0 


7.0 


7.0 



ns 

D63..0 write data valid delay 

1.3 

7.5 

1.3 

4.5 

1.3 

4.5 



ns 

D63..0 write data float delay 


10.0 


7.0 


7.0 



ns 

DP7..0 write data valid delay 

1.3 

7.5 

1.3 

4.5 

1.3 

4.5 



ns 

DP7..0 write data float delay 


10.0 


7.0 


7.0 



ns 

FERR# valid delay 

1.0 

8.3 

1.0 

4.5 

1.0 

4.5 



ns 

HIT# valid delay 

1.0 

6.8 

1.0 

4.5 

1.0 

4.0 



ns 

HITM# valid delay 

1.1 

6.0 

1.1 

4.5 

1.1 

4.0 



ns 

HLDA valid delay 

1.0 

6.8 

1.0 

4.5 

1.0 

4.0 



ns 

IERR# valid delay 

1.0 

8.3 







ns 

LOCK# valid delay 

11 

7.0 

1.1 

4.5 

1.1 

4.0 



ns 

LOCK# float delay 


10.0 


7.0 


7.0 



ns 

M/IO# valid delay 

1.0 

5.9 

1.0 

4.5 

1.0 

4.0 



ns 

M/IO# float delay 


10.0 


7.0 ; 


7.0 



ns 

PCD valid delay 

1.0 

7.0 

1.0 

4.5 

1.0 

4.0 



ns 

PCD float delay 


10.0 


7.0 


7.0 



ns 

PCHK# valid delay 

1.0 

7.0 

1.0 

4.5 | 

1.0 

4.5 



ns 

PM1..0 valid delay f 

1.0 

10.0 







ns 

PRDY valid delav [ 

1.0 

8.0 







ns 

PWT valid delay 

1.0 

7.0 

1.0 

4.5 

1.0 

4.0 



ns 

PWT float delay ! 


10.0 


7.0 


7.0 j 



ns 

SCYC valid delay 

1.0 

7.0 

1.0 

4.5 

: o 

4.0 



ns 

SCYC float delay 


10.0 


7.0 


7.0 



ns 

SMIACT# valid delay 

1.0 

7.3 

1.0 

4.5 

1.0 

4.0 



ns 

W/R# valid delay 

1.0 

7.0 

1.0 

4.5 

1.0 

4.0 



ns 

W/R# float delay 


10.0 


7.0 


7.0 



ns 
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A31..5 setup time 

6.0 


3.0 


3.0 




ns 

A31..5 hold time 

1.0 


1.0 


1.0 




ns 

A20M# setup time 

5.0 


3.0 


3.0 




ns 

A20M# hold time 

1.0 


1.0 


1.0 




ns 

AHOLD setup time 

5.5 


3.5 


3.5 




ns 

AHOLD hold time 

1.0 


1.0 


1.0 




ns 

AP setup time 

5.0 


1.7 


1.7 




ns 

AP hold time 

1.0 


1.0 


1.0 




ns 

BOFF# setup time 

5.5 


3.5 


3.5 




ns 

BOFF# hold time 

1.0 


1.0 


1.0 




ns 

BRDY# setup time 

5.0 


3.0 


3.0 




ns 

BRDY# hold time 

1.0 


1.0 


1.0 




ns 

BRDYC# setup time 

5.0 


3.0 


3.0 




ns 

BRDYC# hold time 

1.0 


1.0 


1.0 




ns 

BUSCHK# setup time 

5.0 


3.0 


3.0 




ns 

BUSCHK# hold time 

1.0 


1.0 


1.0 




ns 

D63..0 read data setup time 

2.8 


1.7 


1.7 




ns 

D63..0 read data hold time 

1.5 


1.5 


1.5 




ns 

DP7..0 read data setup time 

2.8 


1.7 


1.7 




ns 

DP7..0 read data hold time 

1.5 


1.5 


1.5 




ns 

EADS# setup time 

5.0 


3.0 


3.0 




ns 

EADS# hold time 

1.0 


1.0 


1.0 




ns 

EWBE# setup time 

5.0 


1.7 


1.7 




ns 

EWBE# hold time 

1.0 


1.0 


1.0 




ns 

FLUSH# setup time 

5.0 


1.7 


1.7 




ns 

FLUSH# hold time 

1.0 


1.0 


1.0 




ns 

FLUSH# asvnc oulse width 

2 


2 


2 




CLK 

HOLD setup time 

5.0 


1.7 


1.7 




ns 

HOLD hold time 

1.5 


1.5 


1.5 




ns 

IGNNE# setup time 

5.0 


1.7 


1.7 




ns ! 

IGNNE# hold time 

1.0 


1.0 


1.0 




ns 

IGNNE# asvnc oulse width 

2 


2 


2 




CLK 

INIT setup time | 

5.0 


1.7 


1.7 




ns 

INIT hold time 

1.0 


1.0 


1.0 




ns 

INIT asvnc pulse width 

2 


2 


2 




CLK 

INTR setup time 

5.0 


1.7 


1.7 




ns 

INTR hold time 

1.0 


1.0 


1.0 




ns 

INV setup time 

5.0 


1.7 


1.7 




/s 

INV hold time 

1.0 


1.0 


1.0 




ns 

KEN# setup time 

5.0 


3.0 


3.0 




ns 

KEN# hold time 

1.0 


1.0 


1.0 




ns 

NA# setup time 

4.5 


1.7 


1.7 




ns 
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NA# hold time 

1.0 


1.0 


1.0 




ns 

NMI setup time 

5.0 


1.7 


1.7 




ns 

NMI hold time 

1.0 


1.0 


1.0 




ns 

NMI asvnc pulse width 

2 


2 


2 




CLK 

PEN# setup time 

4.8 


1.7 


1.7 




ns 

PEN# hold time 

1.0 


1.0 


1.0 




ns 

R/S# setup time 

5.0 


1.7 


1.7 




ns 

R/S# hold time 

1.0 


1.0 


1.0 




ns 

R/S# asvnc pulse width 

2 


2 


2 




CLK 

SMI# setup time 

5.0 


1.7 


1.7 




ns 

SMI# hold time 

1.0 


1.0 


1.0 




ns 

SMI# asvnc pulse width 

2 


2 


2 




CLK 

STPCLK# setup time 

5.0 


1.7 


1.7 




ns 

STPCLK# hold time 

1.0 


1.0 


1.0 




ns 

WB/WT# setup time 

4.5 


1.7 


1.7 




ns 

WB/WT# hold time 

1.0 


1.0 


1.0 




ns 
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RESET setup time 

5.0 


1.7 


1.7 




ns 

RESET hold time 

1.0 


1.0 


1.0 




ns 

RESET pulse width 

15 


15 


15 




CLK 

RESET active 

1.0 


1.0 


1.0 




ms 

BF2..0 setup time 

1.0 


1.0 


1.0 




ms 

BF2..0 hold time 

2 


2 


2 




CLK 

BRDYC# hold time 

1.0 


1.0 


1.0 




ns 

BRDYC# setup time 

2 


2 


2 




CLK 

BRDYC# hold time 

2 


2 


2 




CLK 

FLUSH# setup time 

5.0 


1.7 


1.7 




ns 

FLUSH# hold time 

1.0 


1.0 


1.0 




ns 

FLUSH# setup time 

2 


2 


2 




CLK 

FLUSH# hold time 

2 


2 


2 




CLK 


Fig. 49D 


PBREQ# fliaht time 

0 

2.0 







ns 

PBGNT# fliaht time 

0 

2.0 







ns 

PHIT# fliaht time 

0 

2.0 







ns 

PHITM# fiipht time 

0 

1.8 







ns 

A31..5 setup time 

3.7 








ns 

A31..5 hold time 

0.8 








ns 

D/C# setup time 

4.0 








ns | 

D/C# hold time 

0.8 








ns 

W/R# setup time 

4.0 








ns 

W/R# hold time 

0.8 








ns 

CACHE# setup time 

4.0 








ns 

CACHE# hold time 

1.0 








ns 

LOCK# setup time 

4.0 








ns 

LOCK# hold time 

0.8 








ns 

SCYC setup time 

4.0 








ns 

SCYC hold time 

0.8 








ns 

ADS# setup time 

5.8 








ns | 

ADS# hold time 

0.8 








ns 

M/IO# setup time 

5.8 








ns 

M/IO# hold time 

0.8 








ns 

HIT# setup time 

6.0 








ns 

HIT# hold time 

1.0 








ns 

HITM# setup time 

6.0 








ns 

HITM# hold time 

0.7 








ns 

HLDA setup time 

6.0 








ns 

HLDA hold time 

0.8 








ns 

DPEN# valid time 


10.0 







CLK 

DPEN# hold time 

2.0 








CLK 

D/P# valid delav (primarv) 

1.0 

8.0 







ns 


Fig. 49E 


TCK frequency 


25 




25 



MH 

z 

TCK period 

40.0 




40.0 




ns 

TCK high time (>2v) 

14.0 




14.0 




ns 

TCK low time (<0.8V) 

14.0 




14.0 




ns 

TCK rise time (0.8V->2V) 


5.0 




5.0 



ns 

TCK fall time (2V->0.8V) 


5.0 




5.0 



ns 

TRST# pulse width 

30.0 




30.0 




ns 


Fig. 49F 


TDI setup time 

5.0 




5.0 




ns 

TDI hold time 

9.0 




9.0 




ns 

TMS setup time 

5.0 




5.0 




ns 

TMS hold time 

9.0 




9.0 




ns ; 

TDO valid delay 

3.0 

13.0 



3.0 

13.0 



ns 

TDO float delay 


16.0 




16.0 



ns 

all outputs valid delay 

3.0 

13.0 



3.0 

13.0 



ns 

all outputs float delay 


16.0 




16.0 



ns 

all inputs setup time 

5.0 




5.0 




ns 

all inputs hold time 

9.0 




9.0 




ns 


Fig. 49G 


Operation codes 


L8 

Load signed byte 

L16.B 

Load signed doublet big-endian 

L.16.A.B 

Load signed doublet aligned big-endian 

L.16.L 

Load signed doublet little-endian 

L.16.A.L 

Load signed doublet aligned little-endian 

L.32.B 

Load signed quadlet big-endian 

L.32.A.B 

Load signed quadlet aligned big-endian 

L.32.L 

Load signed quadlet little-endian 

L.32.A.L 

Load signed quadlet aligned little-endian 

L.64.B 

Load signed octlet big-endian 

L.64.A.B 

Load signed octlet aligned big-endian 

L.64.L 

Load signed octlet little-endian ] 

L.64.A.L 

Load signed octlet aligned little-endian 

L128.B 

Load hexlet big-endian 

L.128.A.B 

Load hexlet aligned big-endian 

L.128.L 

Load hexlet little-endian 

L.128.A.L 

Load hexlet aligned little-endian 

L.U.8 

Load unsigned byte ! 

LU.16.B 

Load unsigned doublet big-endian 

L.U.16.A.B 

Load unsigned doublet aligned big-endian 

L.U.16.L 

Load unsigned doublet little-endian 

L.U.16.A.L 

Load unsigned doublet aligned little-endian 

L.U.32.B 

Load unsigned quadlet big-endian 

L.U.32AB 

Load unsigned quadlet aligned big-endian 

L.U.32.L 

Load unsigned quadlet little-endian 

L.U.32.A.L 

Load unsigned quadlet aligned little-endian 

L.U.64.B 

Load unsigned octlet big-endian \ 

L.U.64.A.B 

Load unsigned octlet aligned big-endian 

L.U.64.L 

Load unsigned octlet little-endian 

L.U.64.A.L 

Load unsigned octlet aligned little-endian 


Fig. 50A 


Selection 


numDer Tormai 

type 

size 

alignment 

ordering 

signed byte 


8 



unsigned byte 

U 

8 



signed integer 


16 32 64 


L B 

signed integer aligned 


16 32 64 

A 

L B 

unsigned integer 

U 

16 32 64 


L B 

unsigned integer aligned 

u 

16 32 64 

A 

L B 

register 


128 


L B 

register aligned 


128 

A 

L B 


Format 


op rd=rc,rb 
rd=op(rc,rb) 

31 24 23 18 17 12 11 6 5 0 

I Q/IINOR | rd | rc | rb | op I 


Fig. 50B 


Definition 


def Load(op,rd,rc,rb) as 
case op of 

L16L, L32L, L8, L16AL, L32AL, L16B, L32B, L16AB, L32AB, 
L64L, L64AL, L64B, L64AB: 
signed <- true 

LU16L, LU32L, LU8, LU16AL, LU32AL, LU16B, LU32B, LU16AB, LU32AB, 
LU64L, LU64AL, LU64B, LU64AB: 

signed <r- false 
L128L, L128AL, L128B, L128AB: 

signed <- undefined 

endcase 
case op of 
L8, LU8: 

size <- 8 

L16L, LU16L, L16AL, LU16AL, L16B, LU16B, L16AB, LU16AB: 
size <- 16 

L32L, LU32L, L32AL, LU32AL, L32B, LU32B, L32AB, LU32AB: 
size <- 32 

L64L, LU64L, L64AL, LU64AL, L64B, LU64B, L64AB, LU64AB: 

size <- 64 
L128L, L128AL, L128B, L128AB: 

size <- 128 

endcase 

Isize <- log(size) 

case op of 

L16L, LU16L, L32L, LU32L, L64L, LU64L, L128L, 
L16AL, LU16AL, L32AL, LU32AL, L64AL, LU64AL, L128AL: 
order <r- L 

L16B, LU16B, L32B, LU32B, L64B t LU64B, L128B, 

L16AB, LU16AB, L32AB, LU32AB, L64AB, LU64AB, L128AB: 

order <r- B 
L8. LU8: 

order <- undefined 

endcase 


Fig. 50C 


c <- RegRead(rc, 64) 

b <- RegRead(rb, 64) 

VirtAddr <r- c + (b 66 -lsize..O II Q l * ze ~ 3 ) 

case op of 

L16AL, LU16AL, L32AL, LU32AL, L64AL, LU64AL, L128AL, 
L16AB, LU16AB, L32AB, LU32AB, L64AB, LU64AB, L128AB: 
if (C| S j 2e -4..o * 0 then 

raise AccessDisallowedByVirtualAddress 

endif 

L16L, LU16L, L32L, LU32L, L64L, LU64L, L128L, 
L16B, LU16B, L32B, LU32B, L64B, LU64B, L128B: 
L8 ? LU8: 
endcase 

m <- LoadMemory(c,VirtAddr f si2e,order) 
a <- (msize-1 and signed) 1 28 " size || m 
RegWrite(rd, 128, a) 
enddef 

Exceptions 

Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 

Fig. 50C (cont) 


Op ration codes 


L.1.8 

Load immediate signed byte i 

LI.16.A.B 

Load immediate signed doublet aligned big-endian 

L.I.16.B 

Load immediate signed doublet big-endian 

LI.16.A.L 

Load immediate signed doublet aligned little-endian 

L.I.16.L 

Load immediate signed doublet little-endian 

L.I.32.A.B 

Load immediate signed quadlet aligned big-endian 

LI.32.B 

Load immediate signed quadlet big-endian 

L.I.32.A.L 

Load immediate signed quadlet aligned little-endian 

LI.32.L 

Load immediate signed quadlet little-endian 

L.I.64.AB 

Load immediate signed octlet aligned big-endian 

L.I.64.B 

Load immediate signed octlet big-endian 

LI.64.A.L 

Load immediate signed octlet aligned little-endian 

L.I.64.L 

Load immediate signed octlet little-endian 

L.I.128.A.B 

Load immediate hexlet aligned big-endian 

L.I.128.B 

Load immediate hexlet big-endian 

LI.128.A.L 

Load immediate hexlet aligned little-endian 

L.I.128.L 

Load immediate hexlet little-endian 

L.I.U.8 

Load immediate unsigned byte 

L.I.U.16.A.B 

Load immediate unsigned doublet aligned big-endian 

LI.U.16.B 

Load immediate unsigned doublet big-endian 

L.I.U.16.A.L 

Load immediate unsigned doublet aligned little-endian 

L.I.U.Ib.L 

Load immediate unsigned doublet little-endian 

LI.U.32AB 

Load immediate unsigned quadlet aligned big-endian 

LI.U.32.B 

Load immediate unsigned quadlet big-endian 

LI.U.32.A.L 

Load immediate unsigned quadlet aligned little-endian 

LI.U.32.L 

Load immediate unsigned quadlet little-endian 

LI.U.64AB 

Load immediate unsigned octlet aligned big-endian 

LI.U.64.B 

Load immediate unsigned octlet big-endian 

LI.U.64AL 

Load immediate unsigned octlet aligned little-endian 

LI.U.64.L 

Load immediate unsigned octlet little-endian 


Fig. 51A 


Selection 


numoer Tormat 

type 

size 

aiignmeni 

oraering 

signed byte 


8 



unsigned byte 

U 

8 



signed integer 


16 32 64 


L B 

signed integer aligned 


16 32 64 

A 

L B 

unsigned integer 

U 

16 32 64 


L B 

unsigned integer aligned 

U 

16 32 64 

A 

L B 

register 


128 


L B 

register aligned 


128 

A 

L B j 


Format 


op rd=rc,offset 
rd=op(rc,offset) 

31 24 23 18 17 12 11 0 

I op | rd | rc | offset | 

8 6 6 12 


Fig. 51 B 


Definition 


def Loadlmmediate(op,rd l rc,offset) as 
case op of 

LI16L, LI32L, LI8 ( LI16AL, LI32AL, LI16B, LI32B, LI16AB, LI32AB: 
LI64L, LI64AL, LI64B, LI64AB: 

signed <- true 
LIU16L, LIU32L, LIU8 f LIU16AL, LIU32AL, 
LIU16B, LIU32B, LIU16AB, LIU32AB: 
LIU64L, LIU64AL, LIU64B, LIU64AB: 

signed <- false 
LI128L, LI128AL, LI128B, LI128AB: 

signed <- undefined 

endcase 
case op of 

LI8, LIU8: 
size <- 8 

LI16L, LIU16L, LI16AL, LIU16AL, LI16B, LIU16B, LI16AB, LIU16AB 
size 16 

LI32L, LIU32L, LI32AL, LIU32AL, LI32B, LIU32B, LI32AB, LIU32AB 
size <r- 32 

LI64L, LIU64L, LI64AL, LIU64AL, LI64B, LIU64B, LI64AB, LIU64AB 

sl2e < 64 

LI128L, LI128AL, LI128B, LI128AB: 

size <- 128 

endcase 

Isize <- log(size) 

case op of 

LI16L, LIU16L, LI32L, LIU32L, LI64L, LIU64L, LI128L, 
LI16AL, LIU16AL, LI32AL, LIU32AL, LI64AL, LIU64AL, LI128AL: 
order <- LI 

LI16B, LIU16B, LI32B, LIU32B, LI64B, LIU64B, LI128B, 

LI16AB, LIU16AB, LI32AB, LIU32AB, LI64AB, LIU64AB, LI128AB: 

order <- B 
LI8, LIU8: 

order undefined 

endcase 


Fig. 51 C 


c <- RegRead(rc, 64) 

VirtAddr <- c + (offset^- ,size || offset || 0 ,slze - 3 ) 
case op of 

LI16AL, LIU16AL, LI32AL, LIU32AL, LI64AL, LIU64AL, LI128AL, 
LI16AB, LIU16AB, LI32AB, LIU32AB, LI64AB, LIU64AB, LI128AB: 
■f (C| S ize-4..0 * 0 then 

raise AccessDisallowedByVirtualAddress 

endif 

LI16L, LIU16L, LI32L, LIU32L, LI64L, LIU64L, LI128L, 
LI16B, LIU16B, LI32B, LIU32B, LI64B, LIU64B, LI128B: 
LI8, LIU8: 
endcase 

m <r- LoadMemory(c,VirtAddr,size,order) 
a <- (^size-l and signed) 1 28-size || m 
RegWrite(rd, 128, a) 
enddef 

Exceptions 

Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 

Fig. 51C(cont) 


Operation codes 


S.8 

Store byte 

S.16.B 

Store double big-endian 

S.16.A.B 

Store double aligned big-endian 

S.16.L 

Store double little-endian 

S.16.A.L 

Store double aligned little-endian 

S.32.B 

Store quadlet big-endian 

S.32.A.B 

Store quadlet aligned big-endian 

S.32.L 

Store quadlet little-endian 

S.32.A.L 

Store quadlet aligned little-endian 

S.64.B 

Store octlet big-endian 

S.64.A.B 

Store octlet aligned big-endian 

S.64.L 

Store octlet little-endian 

S.64.A.L 

Store octlet aligned little-endian 

S.128.B 

Store hexlet big-endian 

S.128.A.B 

Store hexlet aligned big-endian 

S.128.L 

Store hexlet little-endian 

S.128.A.L 

Store hexlet aligned little-endian 

S.MUX.64.A.B 

Store multiplex octlet aligned big-endian 

S.MUX.64.A.L 

Store multiplex octlet aligned little-endian 


Fig. 52A 


Sel ction 


number format 

op 

size 

alignment 

ordering 

byte 


8 



integer 


16 32 64 128 


L B | 

integer aligned 


16 32 64 128 

A 

L B 

multiplex 

MUX 

64 

A 

L B 


Format 


op rd.rc.rb 
op(rd,rc,rb) 

31 24 23 18 17 12 11 6 5 0 

I sTmINOR I rd I rc I rb I op | 

8 6 6 6 6 

Fig. 52B 


Definition 


def Store(op,rd,rc,rb) as 
case op of 
S8: 

size 8 
S16L. S16AL, S16B,S16AB: 

size <- 16 
S32L, S32AL, S32B, S32AB: 

size <r- 32 
S64L, S64AL, S64B, S64AB, 
SMUX64AB, SMUX64AL: 

size <- 64 
S128L, S128AL, S128B, S128AB: 

size <- 128 

endcase 
Isize <- log(size) 
case op of 
S8: 

order <- undefined 
S16L, S32L. S64L, S128L, 
S16AL, S32AL, S64AL, S128AL, SMUX64ALI: 

order <- L 
S16B, S32B, S64B, S128B, 
S16AB, S32AB, S64AB, S128AB, SMUX64ABI: 

order <- B 

endcase 

c <r- RegRead(rc, 64) 

b <- RegRead(rb, 64) 

VirtAddr <- c + (b 66 -isize..O II 0 ,siz ^) 

case op of 

S16AL, S32AL, S64AL, S128AL, 
S16AB, S32AB, S64AB, S128AB, 
SMUX64AB, SMUX64AL: 
^ (C|size-4..0 * 0 then 

raise AccessDisallowedByVirtuaiAddress 

endif 

8161,8321,8641,81281, 
S16B, S32B, S64B, S128B: 
S8: 
endcase 


Fig. 52C 


d <- RegRead(rd, 128) 
case op of 
S8, 

S16L.S16AL, S16B, S16AB, 
S32L, S32AL, S32B, S32AB, 
S64L. S64AL, S64B, S64AB, 
S128L, S128AL, S128B, S128AB: 

StoreMemory(c,VirtAddr l size l order l d S j ze -i..o) 
SMUX64AB, SMUX64AL: 

lock 

a <r- LoadMemoryW(c,VirtAddr f size,order) 
m <- (di27..64 & d63..o) I (a & -d63..o) 
StoreMemo^cVirtAddr.size^rder.m) 
endlock 

endcase 
enddef 

Exceptions 

Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 

Fig. 52C (cont) 


Operation codes 


S.1.8 

Store immediate byte 

S.I.16.A.B 

Store immediate double aligned big-endian ! 

S.I.16.B 

Store immediate double big-endian 

S.I.16.A.L 

Store immediate double aligned little-endian 

S.I.16.L 

Store immediate double little-endian 

S.I.32.A.B 

Store immediate quadlet aligned big-endian 

S.I.32.B 

Store immediate quadlet big-endian | 

S.I.32.A.L 

Store immediate quadlet aligned little-endian 

S.I.32.L 

Store immediate quadlet little-endian 

S.I.64.A.B 

Store immediate octlet aligned big-endian 

S.I.64.B 

Store immediate octlet big-endian 

S.I.64.A.L 

Store immediate octlet aligned little-endian 

S.I.64.L 

Store immediate octlet little-endian 

S.I.128.A.B 

Store immediate hexlet aligned big-endian 

S.I.128.B 

Store immediate hexlet big-endian 

S.I.128.A.L 

Store immediate hexlet aligned little-endian 

S.I.128.L 

Store immediate hexlet little-endian 

S.MUXI.64.A.B 

Store multiplex immediate octlet aligned big-endian 

S.MUXI.64.A.L 

Store multiplex immediate octlet aligned little-endian 


Fig. 53A 


Sel ction 


number format 

op 

size 

alignment 

ordering 

byte 


8 



integer 


16 32 64 128 


L B 

integer aligned 


16 32 64 128 

A 

L B 

multiplex 

MUX 

64 

A 

L B 


Format 


S.op.l.size.align.order rd,rc,offset 
sopisizealignorder(rd,rc,offset) 

31 24 23 18 17 12 11 0 

I op | rd | rc | offset | 

8 6 6 12 

Fig.53B 


Definition 


def Storelmmediate(op,rd f rc t offset) as 
case op of 
SI8: 

size <- 8 
SI16L, S116AL, SI16B, SI16AB: 

size <- 16 
SI32L, SI32AL, SI32B, SI32AB: 

size <- 32 

SI64L, SI64AL, SI64B, SI64AB, SMUXI64AB, SMUXI64AL: 

size <- 64 
SI128L, SI128AL, SI128B, SI128AB: 

size <- 128 

endcase 
Isize log(size) 
case op of 
SI8: 

order <- undefined 
SI16L, SI32L, SI64L, SI128L, 
SI16AL, SI32AL, SI64AL, SI128AL, SMUXI64AL: 

order <- L 
SI16B, SI32B, SI64B, SI128B, 
SI16AB, SI32AB, SI64AB, SI128AB, SMUXI64AB: 

order <- B 

endcase 

c <r- RegRead(rc f 64) 

VirtAddr <- c + (offsetff- ,size || offset || 0 ,size " 3 ) 
case op of 

SI16AL, SI32AL, SI64AL, SI128AL, 
SI16AB, SI32AB, SI64AB, SI128AB, 
SMUXI64AB, SMUXI64AL: 
if (C|size-4..0 * 0 then 

raise AccessDisallowedByVirtualAddress 

endif 

SI16L, SI32L, SI64L, SI128L, 
SI16B, SI32B, SI64B, SI128B: 
SI8: 
endcase 


Fig. 53C 


d <- RegRead(rd, 128) 
case op of 
SI8, 

SI16L, SI16AL, SI16B, SI16AB, 
SI32L, SI32AL, SI32B, SI32AB, 
Sf64L, SI64AL, SI64B, SI64AB, 
SI128L, SI128AL, SI128B, SI128AB: 

StoreMemory(c,VirtAddr l size,order,d S i ze .i..o) 
SMUXI64AB, SMUXI64AL: 

lock 

a <- LoadMemoryWfc.VirtAddr.size.order) 

m <- (dl27..64 & d63..0) I (a & -d63..o) 
StoreMemo^cVirtAddr.sizcordenm) 
endlock 

endcase 
enddef 

Exceptions 

Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 

Fig. 53C (cont) 
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